diff options
Diffstat (limited to 'compiler/luci-interpreter/src')
359 files changed, 30432 insertions, 2141 deletions
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp new file mode 100644 index 000000000..14bc75efe --- /dev/null +++ b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" + +namespace luci_interpreter +{ + +BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize) +{ + int32_t p = lowerLog2(memSize); + + // We assume that the requested size of memory does not exceed 4 GB + assert(p < 32); + memSize = 1 << p; + + _start_block = reinterpret_cast<Block *>(memory_start); + _start_block->size = memSize - sizeof(Block); + _start_block->is_free = true; + _start_block->self = _start_block; + _num_blocks = 0; + _size = _start_block->size; + + for (auto &_free_block : _free_blocks) + _free_block = nullptr; + + addToBlocks(_start_block, p); +} + +void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + const size_t element_size = getDataTypeSize(tensor.element_type()); + const int32_t num_elements = tensor.shape().num_elements(); + auto size = num_elements * element_size; + auto footprint = size + sizeof(Block); + auto l = (footprint & (footprint - 1)) == 0 + ? lowerLog2(footprint) + : lowerLog2(footprint) + 1; // check footprint is pow_of_2 + + while (l < 32 && !_free_blocks[l]) + l++; + + if (l >= 32) + { + throw std::runtime_error{"Memory limit exceeded"}; + } + + Block *tmp; + tmp = _free_blocks[l]; + removeFromBlocks(tmp, l); + + while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block)) + { + divideBlock(tmp, l); + l--; + } + + tmp->is_free = false; + tmp->self = tmp; + _num_blocks++; + + auto *data = (uint8_t *)(tmp + 1); + tensor.set_data_buffer(data); +} + +void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + auto data = tensor.data<void>(); + auto *tmp = (Block *)((uint8_t *)data - sizeof(Block)); + + assert(tmp->self == tmp); + + tmp->is_free = true; + addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block))); + + while (tmp) + if (tmp->size == _size) + break; + else + tmp = mergeBlock(tmp); + + _num_blocks--; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp new file mode 100644 index 000000000..29fb767b7 --- /dev/null +++ b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" +#include <gtest/gtest.h> + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +TEST(BuddyMemoryManager, basic) +{ + auto mem_pool = std::make_unique<uint8_t[]>(200); + auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130); + Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor"); + + buddy_memory_manager->allocate_memory(first_tensor); + + uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + + first_tensor.writeData(data_1, 8); + uint8_t array_1[8]; + first_tensor.readData(array_1, 8); + for (int i = 0; i < 8; i++) + { + EXPECT_EQ(data_1[i], array_1[i]); + } + + Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor"); + buddy_memory_manager->allocate_memory(second_tensor); + + uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}}; + second_tensor.writeData(data_2, 10); + + uint8_t array_2[2][5]; + second_tensor.readData(array_2, 10); + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 5; j++) + { + EXPECT_EQ(data_2[i][j], array_2[i][j]); + } + } + + buddy_memory_manager->release_memory(first_tensor); + EXPECT_EQ(first_tensor.data<void>(), nullptr); + + buddy_memory_manager->release_memory(second_tensor); + EXPECT_EQ(second_tensor.data<void>(), nullptr); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt index 47b68fa40..997b75a84 100644 --- a/compiler/luci-interpreter/src/CMakeLists.txt +++ b/compiler/luci-interpreter/src/CMakeLists.txt @@ -1,41 +1,61 @@ -nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET) -nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) -nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET) -nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET) +include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake") -if (NOT TensorFlowSource_FOUND) - message(STATUS "Skipping luci-interpreter: TensorFlow not found") - return() -endif () +initialize_pal() -if (NOT TensorFlowGEMMLowpSource_FOUND) - message(STATUS "Skipping luci-interpreter: gemmlowp not found") +if (NOT PAL_INITIALIZED) + message("PAL Failed to initialize, skip luci-interpreter") return() -endif () +endif() -if (NOT TensorFlowEigenSource_FOUND) - message(STATUS "Skipping luci-interpreter: Eigen not found") - return() -endif () +message(STATUS "LUCI INTERPRETER BEGIN") -if (NOT TensorFlowRuySource_FOUND) - message(STATUS "Skipping luci-interpreter: Ruy not found") - return() -endif () +set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}") add_subdirectory(core) +message(STATUS "LUCI INTERPRETER CORE") add_subdirectory(kernels) +message(STATUS "LUCI INTERPRETER KERNELS") add_subdirectory(loader) +message(STATUS "LUCI INTERPRETER LOADER") +add_subdirectory(import) +message(STATUS "LUCI INTERPRETER IMPORT") + +message(STATUS "LUCI INTERPTER INITALIZED") set(SOURCES "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" - Interpreter.cpp) + Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp) + +if (NOT LUCI_INTERPRETER_STATIC) + add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES}) +else () + add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES}) +endif () -add_library(luci_interpreter SHARED ${SOURCES}) -target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") -target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter - PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core +set(TEST_SOURCES BuddyMemoryManager.test.cpp) + +target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_BINARY} + PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) -install(TARGETS luci_interpreter DESTINATION lib) +install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib) +install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(buddy_manager_test ${TEST_SOURCES}) +target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY}) diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp index 639ffc1f0..8cf272efd 100644 --- a/compiler/luci-interpreter/src/Interpreter.cpp +++ b/compiler/luci-interpreter/src/Interpreter.cpp @@ -15,6 +15,7 @@ */ #include "luci_interpreter/Interpreter.h" +#include "luci_interpreter/SimpleMemoryManager.h" #include "loader/ModuleLoader.h" @@ -31,7 +32,7 @@ class EventNotifierImpl final : public EventNotifier public: EventNotifierImpl(const RuntimeToIR &runtime_to_ir, const std::vector<ExecutionObserver *> &observers) - : _runtime_to_ir(runtime_to_ir), _observers(observers) + : _runtime_to_ir(runtime_to_ir), _observers(observers) { } @@ -74,7 +75,25 @@ Interpreter::Interpreter(const luci::Module *module) _runtime_to_ir = std::make_unique<RuntimeToIR>(); _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); - ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor); + + _default_memory_manager = std::make_unique<SimpleMemoryManager>(); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + _default_memory_manager.get()); + loader.load(); +} + +Interpreter::Interpreter(const luci::Module *module, + luci_interpreter::IMemoryManager *memory_manager) +{ + assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead"); + + _runtime_to_ir = std::make_unique<RuntimeToIR>(); + _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + memory_manager); loader.load(); } diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp new file mode 100644 index 000000000..230e39896 --- /dev/null +++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/SimpleMemoryManager.h" + +namespace luci_interpreter +{ + +void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + tensor.set_data_buffer(data); +} + +void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_data_allocated()) + { + tensor.set_data_buffer(nullptr); + return; + } + auto data = tensor.data<uint8_t>(); + delete[] data; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-interpreter/src/StaticMemoryManager.cpp new file mode 100644 index 000000000..73a819919 --- /dev/null +++ b/compiler/luci-interpreter/src/StaticMemoryManager.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/StaticMemoryManager.h" + +namespace luci_interpreter +{ + +void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + int32_t offset = tensor.get_offset(); + assert(offset >= 0); + auto tensor_ptr = _buffer_ptr + offset; + tensor.set_data_buffer(tensor_ptr); +} + +void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-interpreter/src/TestMemoryManager.cpp new file mode 100644 index 000000000..3beeee55c --- /dev/null +++ b/compiler/luci-interpreter/src/TestMemoryManager.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ + +void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + allocations.push_back(data); + tensor.set_data_buffer(data); +} + +void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt index e576dbd94..c2471e01c 100644 --- a/compiler/luci-interpreter/src/core/CMakeLists.txt +++ b/compiler/luci-interpreter/src/core/CMakeLists.txt @@ -9,9 +9,11 @@ set(SOURCES RuntimeModule.h Tensor.cpp) -add_library(luci_interpreter_core STATIC ${SOURCES}) -set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") -target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter_core PUBLIC luci_lang) -target_link_libraries(luci_interpreter_core PRIVATE nncc_common) +add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang) +target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h index 5f5efb219..a7c4a4218 100644 --- a/compiler/luci-interpreter/src/core/Kernel.h +++ b/compiler/luci-interpreter/src/core/Kernel.h @@ -29,15 +29,15 @@ class Kernel { protected: Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs) - : _inputs(std::move(inputs)), _outputs(std::move(outputs)) + : _inputs(std::move(inputs)), _outputs(std::move(outputs)) { } public: virtual ~Kernel() = default; - std::vector<const Tensor *> getInputTensors() const { return _inputs; } - std::vector<Tensor *> getOutputTensors() const { return _outputs; } + const std::vector<const Tensor *> &getInputTensors() const { return _inputs; } + const std::vector<Tensor *> &getOutputTensors() const { return _outputs; } // Configures the kernel. // This function is currently called once for each kernel during interpreter construction, @@ -59,7 +59,7 @@ template <typename Params> class KernelWithParams : public Kernel protected: KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, const Params ¶ms) - : Kernel(std::move(inputs), std::move(outputs)), _params(params) + : Kernel(std::move(inputs), std::move(outputs)), _params(params) { } diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h index 65d119761..4ddbcefb8 100644 --- a/compiler/luci-interpreter/src/core/KernelParams.h +++ b/compiler/luci-interpreter/src/core/KernelParams.h @@ -19,6 +19,7 @@ #include <luci/IR/AttrPadding.h> #include <luci/IR/AttrFusedActFunc.h> +#include <luci/IR/AttrMirrorPadMode.h> #include <luci_interpreter/core/DataType.h> #include <cstdint> @@ -30,6 +31,7 @@ namespace luci_interpreter // Inject commonly used types into `luci_interpreter` namespace for convenience. using Activation = luci::FusedActFunc; using Padding = luci::Padding; +using MirrorPadMode = luci::MirrorPadMode; struct AddParams { @@ -41,9 +43,16 @@ struct ArgMaxParams DataType output_type; }; +struct BatchMatMulParams +{ + bool adj_x; + bool adj_y; +}; + struct ConcatenationParams { int axis; + Activation activation; }; struct Conv2DParams @@ -72,9 +81,32 @@ struct DepthwiseConv2DParams Activation activation; }; +struct DivParams +{ + Activation activation; +}; + struct FullyConnectedParams { Activation activation; + bool keep_num_dims = false; +}; + +struct GatherParams +{ + int32_t axis; + int32_t batch_dims; +}; + +struct GeluParams +{ + bool approximate; +}; + +struct InstanceNormParams +{ + float epsilon; + Activation activation; }; struct L2NormParams @@ -95,11 +127,27 @@ struct LocalResponseNormalizationParams float beta; }; +struct MirrorPadParams +{ + MirrorPadMode mode; +}; + struct MulParams { Activation activation; }; +struct OneHotParams +{ + int32_t axis; +}; + +struct PackParams +{ + int32_t values_count; + int32_t axis; +}; + struct Pool2DParams { Padding padding; @@ -115,6 +163,35 @@ struct ReducerParams bool keep_dims; }; +struct ResizeBilinearParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ResizeNearestNeighborParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ShapeParams +{ + loco::DataType out_type; +}; + +struct SubParams +{ + Activation activation; +}; + +struct SVDFParams +{ + bool asymmetric_quantize_inputs; + int32_t svdf_rank; + Activation activation; +}; + struct SpaceToDepthParams { int block_size; @@ -144,6 +221,16 @@ struct TransposeConvParams Padding padding; int32_t stride_height; int32_t stride_width; + Activation activation; +}; + +struct UnidirectionalSequenceLSTMParams +{ + Activation activation; + float cell_clip; + float proj_clip; + bool time_major; + bool asymmetric_quantize_inputs; }; struct UnpackParams diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp index 06f0fed15..c2f8d2ea8 100644 --- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp +++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp @@ -19,10 +19,102 @@ #include "core/RuntimeModule.h" #include <algorithm> +#include <unordered_map> namespace luci_interpreter { +class RuntimeGraph::TensorAllocPlan +{ + std::vector<std::vector<Tensor *>> _alloc_plan; + std::vector<std::vector<Tensor *>> _dealloc_plan; + bool _valid = false; + IMemoryManager *_memory_manager; + +public: + explicit TensorAllocPlan(IMemoryManager *memory_manager); + void invalidate() { _valid = false; } + bool isValid() const { return _valid; } + void build(const RuntimeGraph &graph); + void allocate(size_t kernel_index) const; + void deallocate(size_t kernel_index) const; +}; + +RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager) + : _memory_manager(memory_manager) +{ +} + +void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph) +{ + invalidate(); + using Lifetime = std::pair<size_t, size_t>; + std::unordered_map<Tensor *, Lifetime> lifetimes; + const size_t num_kernels = graph._kernels.size(); + for (size_t index = 0; index < num_kernels; ++index) + { + const auto &kernel = graph._kernels[index]; + for (const Tensor *tensor : kernel->getInputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = index; + } + for (Tensor *tensor : kernel->getOutputTensors()) + { + assert(lifetimes.count(tensor) == 0); + lifetimes[tensor] = Lifetime(index, index); + } + } + for (const Tensor *tensor : graph.getOutputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = num_kernels; + } + _alloc_plan.assign(num_kernels, std::vector<Tensor *>()); + _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>()); + for (const auto &item : lifetimes) + { + _alloc_plan[item.second.first].push_back(item.first); + _dealloc_plan[item.second.second].push_back(item.first); + } + _valid = true; +} + +void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _alloc_plan.size()); + for (Tensor *tensor : _alloc_plan[kernel_index]) + { + _memory_manager->allocate_memory(*tensor); + } +} + +void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _dealloc_plan.size()); + for (Tensor *tensor : _dealloc_plan[kernel_index]) + { + _memory_manager->release_memory(*tensor); + } +} + +RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager) + : _owning_module(owning_module), _memory_manager(memory_manager), + _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager)) +{ +} + +RuntimeGraph::~RuntimeGraph() +{ + for (auto &tensor : _tensors) + { + if (tensor->is_data_allocated()) + _memory_manager->release_memory(*tensor); + } +} + Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor) { assert(tensor != nullptr); @@ -44,14 +136,23 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors) _output_tensors = output_tensors; } +void RuntimeGraph::configureAllocations(Tensor *tensor) +{ + _memory_manager->allocate_memory(*tensor); +} + void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel) { assert(kernel != nullptr); _kernels.push_back(std::move(kernel)); + _tensor_alloc_plan->invalidate(); } void RuntimeGraph::execute() const { + if (!_tensor_alloc_plan->isValid()) + _tensor_alloc_plan->build(*this); + EventNotifier *event_notifier = _owning_module->getEventNotifier(); // Notify the observers that the input tensors have changed. @@ -59,12 +160,14 @@ void RuntimeGraph::execute() const { for (const Tensor *input_tensor : getInputTensors()) { - event_notifier->postTensorWrite(input_tensor); + if (input_tensor->is_observable()) + event_notifier->postTensorWrite(input_tensor); } } - for (const auto &kernel : _kernels) + for (size_t index = 0; index < _kernels.size(); ++index) { + const auto &kernel = _kernels[index]; if (event_notifier != nullptr) { event_notifier->preOperatorExecute(kernel.get()); @@ -73,6 +176,10 @@ void RuntimeGraph::execute() const // TODO The `configure` method should only be called if the outputs of an operator need to be // resized. kernel->configure(); + + // Preallocate outputs in advance instead of relying on automatic allocation + _tensor_alloc_plan->allocate(index); + kernel->execute(); if (event_notifier != nullptr) @@ -82,11 +189,12 @@ void RuntimeGraph::execute() const for (const Tensor *tensor : kernel->getOutputTensors()) { - if (event_notifier != nullptr) + if (event_notifier != nullptr && tensor->is_observable()) { event_notifier->postTensorWrite(tensor); } } + _tensor_alloc_plan->deallocate(index); } } diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h index 6ddbea4e9..8184e249d 100644 --- a/compiler/luci-interpreter/src/core/RuntimeGraph.h +++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h @@ -18,6 +18,7 @@ #define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H #include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" #include "core/Kernel.h" #include <memory> @@ -30,14 +31,21 @@ class RuntimeModule; class RuntimeGraph { +private: + class TensorAllocPlan; + friend class TensorAllocPlan; + public: - explicit RuntimeGraph(RuntimeModule *owning_module) : _owning_module(owning_module) {} + explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager); + ~RuntimeGraph(); Tensor *addTensor(std::unique_ptr<Tensor> &&tensor); void setInputTensors(const std::vector<Tensor *> &input_tensors); void setOutputTensors(const std::vector<Tensor *> &output_tensors); + void configureAllocations(Tensor *tensor); + const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; } const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; } @@ -46,6 +54,7 @@ public: void execute() const; private: + IMemoryManager *_memory_manager; RuntimeModule *_owning_module; std::vector<std::unique_ptr<Tensor>> _tensors; std::vector<Tensor *> _input_tensors; @@ -53,6 +62,8 @@ private: // Kernels in execution order. std::vector<std::unique_ptr<Kernel>> _kernels; + // Tensors that are not used anymore after given op + std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan; }; } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h index dccc3a173..78873b0ec 100644 --- a/compiler/luci-interpreter/src/core/RuntimeModule.h +++ b/compiler/luci-interpreter/src/core/RuntimeModule.h @@ -19,6 +19,7 @@ #include "core/RuntimeGraph.h" #include "core/EventNotifier.h" +#include "luci_interpreter/MemoryManager.h" #include <memory> #include <vector> @@ -33,9 +34,9 @@ public: EventNotifier *getEventNotifier() const { return _event_notifier; } - RuntimeGraph *addGraph() + RuntimeGraph *addGraph(IMemoryManager *memory_manager) { - _graphs.push_back(std::make_unique<RuntimeGraph>(this)); + _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager)); return _graphs.back().get(); } diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp index 4fe7479e5..3c3c5ffff 100644 --- a/compiler/luci-interpreter/src/core/Tensor.cpp +++ b/compiler/luci-interpreter/src/core/Tensor.cpp @@ -24,12 +24,9 @@ namespace luci_interpreter Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name) - : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)), - _name(std::move(name)) + : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)), + _name(std::move(name)), _data_allocated(false) { - const size_t element_size = getDataTypeSize(_element_type); - const int32_t num_elements = _shape.num_elements(); - _data = std::make_unique<uint8_t[]>(num_elements * element_size); } void Tensor::readData(void *data_ptr, size_t data_size) const @@ -56,13 +53,6 @@ void Tensor::writeData(const void *data_ptr, size_t data_size) std::memcpy(data<void>(), data_ptr, data_size); } -void Tensor::resize(const Shape &new_shape) -{ - _shape = new_shape; - const size_t element_size = getDataTypeSize(_element_type); - const int32_t num_elements = _shape.num_elements(); - // NOTE: _data can be nullptr for empty tensors - _data = std::make_unique<uint8_t[]>(num_elements * element_size); -} +void Tensor::resize(const Shape &new_shape) { _shape = new_shape; } } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt new file mode 100644 index 000000000..dd9733f92 --- /dev/null +++ b/compiler/luci-interpreter/src/import/CMakeLists.txt @@ -0,0 +1,15 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h" + GraphBuilderRegistry.cpp) + +# include specific builders +file(GLOB_RECURSE NODES "Nodes/*") +list(APPEND SOURCES ${NODES}) + +add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) + +target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import) diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp new file mode 100644 index 000000000..a33bca6a4 --- /dev/null +++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci_interpreter/GraphBuilderRegistry.h" +#include "Nodes/CircleReferencingConst.h" + +namespace luci_interpreter +{ + +std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying() +{ + auto builder = std::make_unique<luci::GraphBuilderRegistry>(); + { + // redefine NodeBuilder of BUFFER type + builder->add(std::make_unique<CircleReferencingConstNodeBuilder>()); + } + + return builder; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp new file mode 100644 index 000000000..14e90f240 --- /dev/null +++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleReferencingConst.h" + +#include <vector> + +namespace +{ + +// helper struct which describes data loaded to custom_options of CircleReferencingConst node +struct ConstDataReference +{ + const uint8_t *data = nullptr; + uint32_t size = 0; +}; + +} // namespace + +namespace luci_interpreter +{ +using namespace luci; + +CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index, + GraphBuilderContext *context) const +{ + assert(tensor_index >= 0); + + const auto graph = context->graph(); + const auto reader = context->reader(); + const auto tensors = reader->tensors(); + auto const const_tensor = tensors[tensor_index]; + assert(const_tensor != nullptr); + if (const_tensor->is_variable()) + { + // Create CircleVariable for variable + return nullptr; + } + + auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + auto const const_dims = wrap(const_tensor->shape()); // in NHWC + if (const_dims.empty() && buffer.empty()) + { + // unknown shape tensor and scalar tensor + return nullptr; + } + + // if tensor_index is used as output to some other operator, this is not a constant + auto tensoroutputs = context->tensoroutputs(); + if (tensoroutputs->find(tensor_index)) + { + // other operator output tensor + return nullptr; + } + + uint32_t num_elements = 1; + for (uint32_t r = 0; r < const_dims.size(); ++r) + { + num_elements = num_elements * const_dims[r]; + } + + if (buffer.empty() && num_elements > 0) + { + // normal empty tensor + return nullptr; + } + + // create CircleReferencingConst + auto custom_node = graph->nodes()->create<CircleCustom>(0, 1); + { + custom_node->custom_code("CircleReferencingConst"); + + copy_tensor_attributes(const_tensor, custom_node); + custom_node->shape_status(luci::ShapeStatus::VALID); + + // custom options stores size of buffer and pointer's value to buffer's data + { + std::vector<uint8_t> custom_options(sizeof(ConstDataReference)); + { + auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data()); + const_data_ref = {buffer.data(), buffer.size()}; + } + custom_node->custom_options(custom_options); + } + } + + // Output of CircleCustom node presented with CircleConstNode + auto out_node = graph->nodes()->create<CircleCustomOut>(); + { + out_node->index(0); + out_node->input(custom_node); + + copy_tensor_attributes(const_tensor, out_node); + out_node->shape_status(luci::ShapeStatus::VALID); + } + + return out_node; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h new file mode 100644 index 000000000..ed8f95124 --- /dev/null +++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ +#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ + +#include <luci/Import/NodeBuilder.h> + +#include <luci/IR/Nodes/CircleConst.h> + +namespace luci_interpreter +{ +using namespace luci; + +/** + * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer. + */ +class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER> +{ +public: + CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final; +}; + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ diff --git a/compiler/luci-interpreter/src/kernels/Abs.cpp b/compiler/luci-interpreter/src/kernels/Abs.cpp new file mode 100644 index 000000000..5c6331501 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Abs.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Abs.h" + +#include "kernels/Utils.h" + +#include <cmath> // abs for float + +namespace luci_interpreter +{ +namespace kernels +{ + +Abs::Abs(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Abs::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void Abs::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + eval<float>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Abs::eval() const +{ + const auto *input_data = input()->data<T>(); + auto *output_data = output()->data<T>(); + + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + + for (int i = 0; i < size; ++i) + { + output_data[i] = std::abs(input_data[i]); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Abs.h b/compiler/luci-interpreter/src/kernels/Abs.h new file mode 100644 index 000000000..b5b874a99 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Abs.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ABS_H +#define LUCI_INTERPRETER_KERNELS_ABS_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Abs : public Kernel +{ +public: + Abs(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void eval() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ABS_H diff --git a/compiler/luci-interpreter/src/kernels/Abs.test.cpp b/compiler/luci-interpreter/src/kernels/Abs.test.cpp new file mode 100644 index 000000000..2c42ab75c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Abs.test.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Abs.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Abs kernel(&input_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(AbsTest, FloatSimple) +{ + Check<float>(/*input_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, -1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, 1.0f, 2.0f, // Row 2 + }); + + SUCCEED(); +} + +TEST(AbsTest, Type_Mismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<loco::DataType::S32>({3}, {1, -3, 2}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Abs kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp index 9ed155e94..d7bf3084f 100644 --- a/compiler/luci-interpreter/src/kernels/Add.cpp +++ b/compiler/luci-interpreter/src/kernels/Add.cpp @@ -17,6 +17,7 @@ #include "kernels/Add.h" +#include "kernels/BinaryOpCommon.h" #include "kernels/Utils.h" #include <tensorflow/lite/kernels/internal/reference/add.h> @@ -30,16 +31,22 @@ namespace kernels { Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms) - : KernelWithParams<AddParams>({input1, input2}, {output}, params) + : KernelWithParams<AddParams>({input1, input2}, {output}, params) { } void Add::configure() { - if (input1()->element_type() != input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + if (input1()->element_type() == DataType::S16) { - throw std::runtime_error("Input Tensor Data Type Mismatch."); + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1); + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); } + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); } @@ -50,9 +57,18 @@ void Add::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; case DataType::U8: evalQuantized(); break; + case DataType::S16: + evalQuantizedS16(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -60,22 +76,17 @@ void Add::execute() const void Add::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); - tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<float>(params, _params.activation); const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); + getTensorShape(input1()), getTensorShape(input2()), ¶ms); if (need_broadcast) { tflite::reference_ops::BroadcastAdd4DSlow( - params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), - getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); } else { @@ -85,6 +96,28 @@ void Add::evalFloat() const } } +template <typename T> void Add::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + void Add::evalQuantized() const { const auto input1_scale = static_cast<double>(input1()->scale()); @@ -123,14 +156,13 @@ void Add::evalQuantized() const params.quantized_activation_max = activation_max; const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); + getTensorShape(input1()), getTensorShape(input2()), ¶ms); if (need_broadcast) { tflite::reference_ops::BroadcastAdd4DSlow( - params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), - getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); } else { @@ -140,5 +172,49 @@ void Add::evalQuantized() const } } +void Add::evalQuantizedS16() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + constexpr int left_shift = 12; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [input1_multiplier, input1_shift, // + input2_multiplier, input2_shift, // + output_multiplier, output_shift, // + activation_min, activation_max](int16_t input1_val, int16_t input2_val) { + const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift; + const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift; + const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, input1_multiplier, input1_shift); + const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, input2_multiplier, input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, output_multiplier, output_shift); + const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output)); + return static_cast<int16_t>(clamped_output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()), + getTensorShape(input2()), getTensorData<int16_t>(input2()), + getTensorShape(output()), getTensorData<int16_t>(output()), fn); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h index a1f7e0406..91d95b6af 100644 --- a/compiler/luci-interpreter/src/kernels/Add.h +++ b/compiler/luci-interpreter/src/kernels/Add.h @@ -39,7 +39,9 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; void evalQuantized() const; + void evalQuantizedS16() const; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp index 705b648c8..b8b1c3089 100644 --- a/compiler/luci-interpreter/src/kernels/Add.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Add.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,6 +28,14 @@ namespace using namespace testing; +class AddTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + // for quantized Add, the error shouldn't exceed step float GetTolerance(float min, float max) { @@ -34,108 +43,94 @@ float GetTolerance(float min, float max) return kQuantizedStep; } -TEST(AddTest, Uint8) +TEST_F(AddTest, Uint8) { std::initializer_list<int32_t> base_shape = {2, 3, 1, 2}; std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; std::initializer_list<int32_t> test_shapes[] = { - {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; std::initializer_list<int32_t> output_shapes[] = { - {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; std::vector<std::vector<float>> output_data = { - {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, - 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f, - -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f}, - {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f}, - {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, - 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f, - -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f}, - {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}}; + {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f, + -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f}, + {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f}, + {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f, + -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f}, + {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}}; float kQuantizedTolerance = GetTolerance(-3.f, 3.f); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); for (int i = 0; i < output_data.size(); i++) { - Tensor input1_tensor{ - getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""}; - Tensor input2_tensor{ - getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""}; - std::vector<uint8_t> quantized_input1_value = - quantize<uint8_t>(base_data, quant_param.first, quant_param.second); - std::vector<uint8_t> quantized_input2_value = - quantize<uint8_t>(test_data, quant_param.first, quant_param.second); - input1_tensor.writeData(quantized_input1_value.data(), - quantized_input1_value.size() * sizeof(uint8_t)); - input2_tensor.writeData(quantized_input2_value.data(), - quantized_input2_value.size() * sizeof(uint8_t)); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); Tensor output_tensor = - makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); AddParams params{}; params.activation = Activation::NONE; Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), - output_tensor.scale(), output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); } // Re-run with exchanged inputs. for (int i = 0; i < output_data.size(); i++) { - Tensor input1_tensor{ - getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""}; - Tensor input2_tensor{ - getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""}; - std::vector<uint8_t> quantized_input1_value = - quantize<uint8_t>(test_data, quant_param.first, quant_param.second); - std::vector<uint8_t> quantized_input2_value = - quantize<uint8_t>(base_data, quant_param.first, quant_param.second); - input1_tensor.writeData(quantized_input1_value.data(), - quantized_input1_value.size() * sizeof(uint8_t)); - input2_tensor.writeData(quantized_input2_value.data(), - quantized_input2_value.size() * sizeof(uint8_t)); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); Tensor output_tensor = - makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); AddParams params{}; params.activation = Activation::NONE; Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), - output_tensor.scale(), output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); } } -TEST(AddTest, Float) +TEST_F(AddTest, Float) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; std::vector<std::vector<float>> test_outputs = { - {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, - 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, - 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, - {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, - {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, - 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, - 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, - {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -143,17 +138,19 @@ TEST(AddTest, Float) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f))) - << "With shape number " << i; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; } // Re-run with exchanged inputs. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -161,18 +158,150 @@ TEST(AddTest, Float) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f))) - << "With shape number " << i; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; } } -TEST(AddTest, Input_Output_Type_NEG) +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}); + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1, + 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7}, + {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7}, + {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0, + 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7}, + {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(AddTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<int32_t>> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<std::vector<float>> ref_outputs = { + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(AddTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -182,20 +311,47 @@ TEST(AddTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(AddTest, Invalid_Input_Type_NEG) +TEST_F(AddTest, Invalid_Output_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}); - Tensor output_tensor = makeOutputTensor(DataType::S64); + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); AddParams params{}; params.activation = Activation::RELU; Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } +TEST_F(AddTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp index 5c464ed09..6561a1783 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp @@ -16,7 +16,7 @@ #include "kernels/ArgMax.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALArgMax.h" namespace luci_interpreter { @@ -24,7 +24,7 @@ namespace kernels { ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms) - : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params) + : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params) { } @@ -60,11 +60,10 @@ void ArgMax::configure() void ArgMax::execute() const { -#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ - tflite::optimized_ops::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \ - getTensorData<axis_type>(axis()), getTensorShape(output()), \ - getTensorData<output_type>(output()), \ - std::greater<data_type>()) +#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ + luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \ + getTensorData<axis_type>(axis()), getTensorShape(output()), \ + getTensorData<output_type>(output()), std::greater<data_type>()) if (axis()->element_type() == DataType::S32) { switch (_params.output_type) diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp index 2ab7ff0da..474f4b321 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -16,6 +16,7 @@ #include "kernels/ArgMax.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,18 +33,19 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data, std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data) { - - Tensor input_tensor{getElementType<T1>(), input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T1)); - Tensor dimension_tensor{DataType::S32, dimension_shape, {}, ""}; - dimension_tensor.writeData(dimension_data.begin(), dimension_data.size() * sizeof(int32_t)); - + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T1>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor dimension_tensor = + makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<T2>()); ArgMaxParams params{}; params.output_type = getElementType<T2>(); ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -55,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ArgMaxTest, DataTypes); +TYPED_TEST_SUITE(ArgMaxTest, DataTypes); TYPED_TEST(ArgMaxTest, Simple) { @@ -63,14 +65,14 @@ TYPED_TEST(ArgMaxTest, Simple) /*output_shape=*/{1, 1, 1}, /*input_data=*/ { - 1, 9, 7, 3, + 1, 9, 7, 3, // }, /*dimension_data=*/{3}, /*output_data=*/{1}); Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{}, /*output_shape=*/{1, 1, 1}, /*input_data=*/ { - 1, 9, 7, 3, + 1, 9, 7, 3, // }, /*dimension_data=*/{3}, /*output_data=*/{1}); } @@ -81,30 +83,37 @@ TYPED_TEST(ArgMaxTest, MultiDimensions) /*output_shape=*/{1, 1, 2}, /*input_data=*/ { - 1, 2, 7, 8, 1, 9, 7, 3, + 1, 2, 7, 8, // + 1, 9, 7, 3, // }, /*dimension_data=*/{3}, /*output_data=*/{3, 1}); Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{}, /*output_shape=*/{1, 1, 2}, /*input_data=*/ { - 1, 2, 7, 8, 1, 9, 7, 3, + 1, 2, 7, 8, // + 1, 9, 7, 3, // }, /*dimension_data=*/{3}, /*output_data=*/{3, 1}); } TEST(ArgMaxTest, UnsupportedType_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, { - 1, 2, 7, 8, 1, 9, 7, 3, - }); - Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); ArgMaxParams params{}; params.output_type = DataType::U8; ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp index cdd81d7d6..d3bade9e4 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/pooling.h> +#include "PALAveragePool2d.h" #include <stdexcept> @@ -28,8 +28,9 @@ namespace luci_interpreter namespace kernels { -AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) - : KernelWithParams<Pool2DParams>({input}, {output}, params) +AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms) + : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params) { } @@ -50,24 +51,35 @@ void AveragePool2D::configure() const int32_t input_width = input_shape.dim(2); const int32_t depth = input_shape.dim(3); - const int32_t output_height = computeOutputSize(_params.padding, input_height, - _params.filter_height, _params.stride_height); + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); const int32_t output_width = - computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); _padding_height = - computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); _padding_width = - computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); if (input()->element_type() == DataType::U8) { - if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point()) - { - throw std::runtime_error( - "Quantization param for Input and output must be same(scale or zero-point)"); - } + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + else if (input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); } output()->resize({batches, output_height, output_width, depth}); + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), + getTensorShape(input()), getTensorShape(output())); } void AveragePool2D::execute() const @@ -80,6 +92,12 @@ void AveragePool2D::execute() const case DataType::U8: evalQuantized(); break; + case DataType::S16: + evalSInt16(); + break; + case DataType::S8: + evalSInt8(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -126,5 +144,51 @@ void AveragePool2D::evalQuantized() const getTensorData<uint8_t>(output())); } +void AveragePool2D::evalSInt8() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::AveragePool<int8_t>( + params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void AveragePool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::AveragePool( + params, getTensorShape(input()), getTensorData<int16_t>(input()), // + getTensorShape(output()), getTensorData<int16_t>(output())); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h index 91f212b3a..2c8fe16e7 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h @@ -28,7 +28,8 @@ namespace kernels class AveragePool2D : public KernelWithParams<Pool2DParams> { public: - AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } Tensor *output() const { return _outputs[0]; } @@ -39,6 +40,8 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalSInt16() const; + void evalSInt8() const; private: int32_t _padding_height{}; diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp index cc80e5e90..478bfa68e 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/AveragePool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,16 +27,26 @@ namespace using namespace testing; -TEST(AveragePool2DTest, Float) +class AveragePool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(AveragePool2DTest, Float) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ - -4, -3, -2, -1, 0, // - 1, 2, 3, 4, 5, // - 6, 7, 8, 9, 10, // + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -45,32 +56,31 @@ TEST(AveragePool2DTest, Float) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 0, 1.5, // - 4.5, 6, // + 0, 1.5, // + 4.5, 6, // }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); } -TEST(AveragePool2DTest, Uint8_0) +TEST_F(AveragePool2DTest, Uint8_0) { + std::vector<float> input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); - Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - - std::vector<uint8_t> quant_input = quantize<uint8_t>( - { - 0, -6, 12, 4, // - -3, -2, 10, 7, // - }, - quant_param.first, quant_param.second); - input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t)); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -80,29 +90,28 @@ TEST(AveragePool2DTest, Uint8_0) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear({0.0, 6.0}))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); } -TEST(AveragePool2DTest, Uint8_1) +TEST_F(AveragePool2DTest, Uint8_1) { + std::vector<float> input_data{ + 0, 6, 12, 4, // + 3, 2, 10, 7, // + }; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); - Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - - std::vector<uint8_t> quant_input = quantize<uint8_t>( - { - 0, 6, 12, 4, // - 3, 2, 10, 7, // - }, - quant_param.first, quant_param.second); - input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t)); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -112,26 +121,99 @@ TEST(AveragePool2DTest, Uint8_1) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); - EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear({2.75, 6.0}))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); } -TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) +TEST_F(AveragePool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + std::vector<float> ref_output_data{ + 0, 1.5, // + 4.5, 6, // + }; + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, SInt8) +{ + Shape input_shape{1, 4, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5, + 7, -6, -1, 9, -2, 0, -5, 11, -1, -7}; + std::vector<float> ref_output_data{ + 0, 2.5, // + 1, 1.5, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor<DataType::S8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) { Shape input_shape{1, 3, 5}; std::vector<float> input_data{ - -4, -3, -2, -1, 0, // - 1, 2, 3, 4, 5, // - 6, 7, 8, 9, 10, // + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -141,20 +223,22 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(AveragePool2DTest, In_Out_Type_NEG) +TEST_F(AveragePool2DTest, In_Out_Type_NEG) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ - -4, -3, -2, -1, 0, // - 1, 2, 3, 4, 5, // - 6, 7, 8, 9, 10, // + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -164,25 +248,23 @@ TEST(AveragePool2DTest, In_Out_Type_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(AveragePool2DTest, Quant_Param_NEG) +TEST_F(AveragePool2DTest, Quant_Param_NEG) { + std::vector<float> input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f); std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f); - Tensor input_tensor{ - DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); - - std::vector<uint8_t> quant_input = quantize<uint8_t>( - { - 0, -6, 12, 4, // - -3, -2, 10, 7, // - }, - quant_param1.first, quant_param1.second); - input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t)); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); Pool2DParams params{}; params.padding = Padding::VALID; @@ -192,7 +274,7 @@ TEST(AveragePool2DTest, Quant_Param_NEG) params.stride_width = 2; params.activation = Activation::RELU6; - AveragePool2D kernel(&input_tensor, &output_tensor, params); + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); EXPECT_ANY_THROW(kernel.configure()); } diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp new file mode 100644 index 000000000..24ca22996 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/Utils.h" + +#include "PALBatchMatMul.h" + +#include <tensorflow/lite/kernels/internal/reference/transpose.h> + +#include <stdexcept> + +namespace +{ + +tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape) +{ + tflite::RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, + Tensor *y_tmp, const BatchMatMulParams ¶ms) + : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params) +{ +} + +void BatchMatMul::configure() +{ + auto lhs = x(); + auto rhs = y(); + auto adj_x = params().adj_x; + auto adj_y = params().adj_y; + + // TODO Support non-float types + if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32) + throw std::runtime_error("Unsupported type."); + + LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type()); + + auto lhs_rank = lhs->shape().num_dims(); + auto rhs_rank = rhs->shape().num_dims(); + LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4); + LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4); + + auto lhs_scratchpad = temp_lhs(); + auto rhs_scratchpad = temp_rhs(); + luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs), + getTensorShape(rhs)); + + auto output_rank = std::max(lhs_rank, rhs_rank); + + auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs)); + auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs)); + + // Ensure any batch dimensions obey broacasting rules. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + if (lhs_dim != rhs_dim) + { + if (lhs_dim != 1) + { + LUCI_INTERPRETER_CHECK(rhs_dim == 1); + } + } + } + + // Ensure other dimensions work for matrix multiplication. + int accum_dim_lhs = + adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1); + int accum_dim_rhs = + adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2); + LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs); + + Shape output_shape(output_rank); + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) + { + broadcast_dim = rhs_dim; + } + output_shape.dim(i) = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index); + output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index); + + output()->resize(output_shape); +} + +void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out) +{ + tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in)); + tflite::RuntimeShape shape(getTensorShape(tensor_in)); + tflite::TransposeParams params; + int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) + { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + switch (tensor_in->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in), + transposed_shape, getTensorData<float>(tensor_out)); + break; + default: + throw std::runtime_error("Only suppport fp32 BatchMatMul for now."); + } +} + +void BatchMatMul::execute() const +{ + auto lhs = x(); + auto rhs = y(); + + bool adj_x = params().adj_x; + bool adj_y = params().adj_y; + + auto orig_lhs_shape = getTensorShape(lhs); + auto orig_rhs_shape = getTensorShape(rhs); + + auto rhs_tensor = adj_y ? rhs : temp_rhs(); + auto lhs_tensor = adj_x ? temp_lhs() : lhs; + if (not adj_y) + { + TransposeRowsColumns(rhs, temp_rhs()); + } + if (adj_x) + { + TransposeRowsColumns(lhs, temp_lhs()); + } + tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); + tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); + + switch (x()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape, + getTensorData<float>(lhs_tensor), getTensorShape(output()), + getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h new file mode 100644 index 000000000..744f49795 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H +#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchMatMul : public KernelWithParams<BatchMatMulParams> +{ +public: + BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp, + const BatchMatMulParams ¶ms); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + Tensor *temp_lhs() const { return _outputs[1]; } + Tensor *temp_rhs() const { return _outputs[2]; } +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp new file mode 100644 index 000000000..edfa3a685 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class BatchMatMulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(BatchMatMulTest, Float) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = true; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint) +{ + std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = true; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_BatchSizeTwo) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632., + 767., 800., 833., 866.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_DiffBatch) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4})); +} + +TEST_F(BatchMatMulTest, Invalid_Shape_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Batch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank_NEG) +{ + Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank2_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, TypeMisMatch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::U8, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp new file mode 100644 index 000000000..bd315ff7b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/Utils.h" + +#include "PALBatchToSpaceND.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +namespace +{ +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; +} // namespace + +BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output) + : Kernel({input, block_shape, crops}, {output}) +{ +} + +void BatchToSpaceND::configure() +{ + + const auto *block_shape_data = block_shape()->data<int32_t>(); + const auto *crops_data = crops()->data<int32_t>(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2); + for (int i = 0; i < spatial_dims_num * 2; ++i) + { + LUCI_INTERPRETER_CHECK(crops_data[i] >= 0); + } + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0); + output_batch_size = output_batch_size / block_shape_data[i]; + output_shape.dim(i + 1) = + input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1]; + } + + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void BatchToSpaceND::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()), + getTensorData<int32_t>(block_shape()), getTensorShape(crops()), + getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()), + getTensorData<int32_t>(block_shape()), getTensorShape(crops()), + getTensorData<int32_t>(crops()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h new file mode 100644 index 000000000..57703ea5d --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchToSpaceND : public Kernel +{ +public: + BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *crops() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp new file mode 100644 index 000000000..52647a763 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor crops_tensor = + makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T> class BatchToSpaceNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes); + +TYPED_TEST(BatchToSpaceNDTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2}, + /*output_shape=*/{1, 4, 4, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0}, + /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}); +} + +TEST(BatchToSpaceNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(BatchToSpaceNDTest, Invalid_Crops_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h new file mode 100644 index 000000000..2d2842a9e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H +#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H + +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0). +template <typename T, typename Op, int N = 5> +void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape, + const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, + const T *input2_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data, + Op op) +{ + if (unextended_input1_shape == unextended_input2_shape) + { + const int flat_size = tflite::MatchingElementsSize( + unextended_input1_shape, unextended_input2_shape, unextended_output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = op(input1_data[i], input2_data[i]); + } + } + else + { + assert(unextended_input1_shape.DimensionsCount() <= N); + assert(unextended_input2_shape.DimensionsCount() <= N); + assert(unextended_output_shape.DimensionsCount() <= N); + + tflite::NdArrayDesc<N> desc1{}; + tflite::NdArrayDesc<N> desc2{}; + tflite::NdArrayDesc<N> output_desc{}; + tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, + &desc1, &desc2); + tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + auto fn = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + op(input1_data[SubscriptToIndex(desc1, indexes)], + input2_data[SubscriptToIndex(desc2, indexes)]); + }; + tflite::NDOpsHelper<N>(output_desc, fn); + } +} + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt index 040ac5911..9f4ba0e0b 100644 --- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt +++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt @@ -1,125 +1,43 @@ -find_package(Threads REQUIRED) -nnas_find_package(GTest REQUIRED) - set(SOURCES - Add.h - Add.cpp - ArgMax.h - ArgMax.cpp - AveragePool2D.h - AveragePool2D.cpp - Concatenation.h - Concatenation.cpp - Conv2D.h - Conv2D.cpp - DepthToSpace.h - DepthToSpace.cpp - DepthwiseConv2D.h - DepthwiseConv2D.cpp - Elu.h - Elu.cpp - FullyConnected.h - FullyConnected.cpp - If.h - If.cpp - L2Normalize.h - L2Normalize.cpp - L2Pool2D.h - L2Pool2D.cpp - LeakyRelu.h - LeakyRelu.cpp - LocalResponseNormalization.h - LocalResponseNormalization.cpp - Logistic.h - Logistic.cpp - MaxPool2D.h - MaxPool2D.cpp - Mean.h - Mean.cpp - Mul.h - Mul.cpp - Pad.h - Pad.cpp - Reshape.h - Reshape.cpp - Reverse.h - Reverse.cpp - Rsqrt.h - Rsqrt.cpp - Slice.h - Slice.cpp - Softmax.h - Softmax.cpp - SpaceToDepth.h - SpaceToDepth.cpp - Split.h - Split.cpp - StridedSlice.h - StridedSlice.cpp - Sqrt.h - Sqrt.cpp - Squeeze.h - Squeeze.cpp - Tanh.h - Tanh.cpp - Transpose.h - Transpose.cpp - TransposeConv.h - TransposeConv.cpp - Unpack.h - Unpack.cpp) + BinaryOpCommon.h + Utils.h + Utils.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp) + +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "${NODE}.h") + list(APPEND SOURCES "${NODE}.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) -list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) +add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) -add_library(luci_interpreter_kernels STATIC ${SOURCES}) -set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) -target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE - "${TensorFlowRuySource_DIR}" - "${TensorFlowGEMMLowpSource_DIR}" - "${TensorFlowEigenSource_DIR}" - "${TensorFlowSource_DIR}") -target_link_libraries(luci_interpreter_kernels - PUBLIC luci_interpreter_core - PRIVATE nncc_common Threads::Threads) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common) + +add_pal_to_target(${LUCI_INTERPRETER_KERNELS}) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) +macro(REGISTER_KERNEL NODE) + list(APPEND TEST_SOURCES "${NODE}.test.cpp") +endmacro(REGISTER_KERNEL) -set(TEST_SOURCES - Add.test.cpp - ArgMax.test.cpp - AveragePool2D.test.cpp - Concatenation.test.cpp - Conv2D.test.cpp - DepthToSpace.test.cpp - DepthwiseConv2D.test.cpp - Elu.test.cpp - FullyConnected.test.cpp - If.test.cpp - L2Normalize.test.cpp - L2Pool2D.test.cpp - LeakyRelu.test.cpp - LocalResponseNormalization.test.cpp - Logistic.test.cpp - MaxPool2D.test.cpp - Mean.test.cpp - Mul.test.cpp - Pad.test.cpp - Reshape.test.cpp - Reverse.test.cpp - Rsqrt.test.cpp - Slice.test.cpp - Softmax.test.cpp - SpaceToDepth.test.cpp - Split.test.cpp - StridedSlice.test.cpp - Sqrt.test.cpp - Squeeze.test.cpp - Tanh.test.cpp - Transpose.test.cpp - TransposeConv.test.cpp - Unpack.test.cpp) +include(${KERNEL_REGISTER_FILE}) list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp) -GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES}) -target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels) +GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS}) diff --git a/compiler/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-interpreter/src/kernels/Cast.cpp new file mode 100644 index 000000000..39ee725dc --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Cast.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/Utils.h" + +namespace +{ + +using namespace luci_interpreter; +using namespace luci_interpreter::kernels; + +template <typename InT, typename OutT> +void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count) +{ + std::transform(in_data, in_data + elements_count, out_data, + [](InT a) { return static_cast<OutT>(a); }); +} + +template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor) +{ + auto const out_type = out_tensor->element_type(); + auto const elements_count = out_tensor->shape().num_elements(); + + switch (out_type) + { + case loco::DataType::U8: + cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count); + break; + case loco::DataType::U16: + cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count); + break; + case loco::DataType::U32: + cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count); + break; + case loco::DataType::U64: + cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count); + break; + case loco::DataType::S8: + cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count); + break; + case loco::DataType::S16: + cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count); + break; + case loco::DataType::S32: + cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count); + break; + case loco::DataType::S64: + cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count); + break; + case loco::DataType::FLOAT32: + cast_data(in_data, getTensorData<float>(out_tensor), elements_count); + break; + case loco::DataType::BOOL: + cast_data(in_data, getTensorData<bool>(out_tensor), elements_count); + break; + default: + throw std::runtime_error("Unsupported output type."); + } +} + +void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor) +{ + auto in_type = in_tensor->element_type(); + + switch (in_type) + { + case loco::DataType::U8: + cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor); + break; + case loco::DataType::U16: + cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor); + break; + case loco::DataType::U32: + cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor); + break; + case loco::DataType::U64: + cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor); + break; + case loco::DataType::S8: + cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor); + break; + case loco::DataType::S16: + cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor); + break; + case loco::DataType::S32: + cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor); + break; + case loco::DataType::S64: + cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor); + break; + case loco::DataType::FLOAT32: + cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor); + break; + case loco::DataType::BOOL: + cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor); + break; + default: + throw std::runtime_error("Unsupported input type."); + } +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Cast::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown); + LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown); + + const Shape &shape = input()->shape(); + output()->resize(shape); +} + +void Cast::execute() const +{ + assert(input()->shape().num_elements() == output()->shape().num_elements()); + + cast_from_tensor_to_tensor(input(), output()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Cast.h b/compiler/luci-interpreter/src/kernels/Cast.h new file mode 100644 index 000000000..f0bd02037 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Cast.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CAST_H +#define LUCI_INTERPRETER_KERNELS_CAST_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Cast : public Kernel +{ +public: + Cast(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CAST_H diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp new file mode 100644 index 000000000..4713ad34c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data, + std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType input_type = getElementType<T1>(); + constexpr DataType output_type = getElementType<T2>(); + + Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template <typename T> +void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data, + std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType input_type = loco::DataType::BOOL; + constexpr DataType output_type = getElementType<T>(); + std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted; + for (auto elem : input_data) + { + input_data_converted.push_back(elem); + } + + Tensor input_tensor = + makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template <typename T> class CastTest : public ::testing::Test +{ +}; + +using IntDataTypes = + ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>; +TYPED_TEST_SUITE(CastTest, IntDataTypes); + +TYPED_TEST(CastTest, FloatToInt) +{ + Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToFloat) +{ + Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }); + SUCCEED(); +} + +template <typename T1, typename T2> void check_int() +{ + Check<T1, T2>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToInt) +{ + check_int<TypeParam, uint8_t>(); + check_int<TypeParam, uint16_t>(); + check_int<TypeParam, uint32_t>(); + check_int<TypeParam, uint64_t>(); + check_int<TypeParam, int8_t>(); + check_int<TypeParam, int16_t>(); + check_int<TypeParam, int32_t>(); + check_int<TypeParam, int64_t>(); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToBool) +{ + Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 0, 7, 0, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, BoolToInt) +{ + CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1, 0, 0, 1, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToBool) +{ + Check<float, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToFloat) +{ + CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 0.0f, 1.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToFloat) +{ + Check<float, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToBool) +{ + CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, true, false, false, // + }, + /*output_data=*/ + { + true, true, false, false, // + }); + SUCCEED(); +} + +TEST(CastTest, UnsupportedType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::Unknown); + + Cast kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp index 812ab7609..46ee5941e 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp @@ -18,7 +18,7 @@ #include "kernels/Concatenation.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/concatenation.h> #include <stdexcept> @@ -29,27 +29,30 @@ namespace kernels Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output, const ConcatenationParams ¶ms) - : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params) + : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params) { } void Concatenation::configure() { const int num_inputs = _inputs.size(); - assert(num_inputs > 0); + LUCI_INTERPRETER_CHECK(num_inputs > 0); const Tensor *t0 = _inputs[0]; + // TODO: Support concat with fused activation function + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE); + int axis = _params.axis; if (axis < 0) axis += t0->shape().num_dims(); - assert(axis >= 0 && axis < t0->shape().num_dims()); + LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims()); int32_t sum_axis = t0->shape().dim(axis); for (int i = 1; i < num_inputs; ++i) { const Tensor *tensor = _inputs[i]; - assert(tensor->element_type() == t0->element_type()); - assert(tensor->shape().num_dims() == t0->shape().num_dims()); + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); for (int d = 0; d < t0->shape().num_dims(); ++d) { if (d == axis) @@ -58,7 +61,7 @@ void Concatenation::configure() } else { - assert(tensor->shape().dim(d) == t0->shape().dim(d)); + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); } } } @@ -66,11 +69,21 @@ void Concatenation::configure() Shape output_shape = t0->shape(); output_shape.dim(axis) = sum_axis; - // TODO S8 type needs more checking: quantization parameters of all input tensors and the output - // tensor should be the same. Note that there is no such requirement for U8 type. - if (t0->element_type() == DataType::S8) - throw std::runtime_error("Unsupported type."); + // If input tensors are INT8 type then quantization parameters of all input tensors and the output + // should be the same + for (auto current_tensor : _inputs) + { + if (current_tensor->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() == + output()->quantized_dimension()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() == + current_tensor->scales().size()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points()); + LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales()); + } + } output()->resize(output_shape); } diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp index d9a7097d0..f893b38fd 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Concatenation.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,58 +27,242 @@ namespace using namespace testing; -TEST(ConcatenationTest, Float) +class ConcatenationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ConcatenationTest, Float) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; // Try different 'axis' and expect different results. { params.axis = 0; + params.activation = luci::FusedActFunc::NONE; Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + for (auto t : kernel.getOutputTensors()) + { + _memory_manager->allocate_memory(*t); + } kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}))); + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); } { params.axis = -2; // Same as '0'. + params.activation = luci::FusedActFunc::NONE; Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}))); + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); } { params.axis = 1; + params.activation = luci::FusedActFunc::NONE; Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}))); + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); } { params.axis = -1; // Same as '1'. + params.activation = luci::FusedActFunc::NONE; Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}))); + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); } } +TEST_F(ConcatenationTest, Input_Number_Check_NEG) +{ + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Invalid_Axis_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -3; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG) +{ + std::vector<uint8_t> input1_data{1, 2, 3, 4}; + std::vector<int8_t> input2_data{5, 6, 7, 8}; + Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + int quantized_dimension = 3; + std::vector<float> scales{0.1, 0.2, 0.3}; + std::vector<int32_t> zero_points{1, -1, 1}; + + Tensor input1_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0)); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4}; + std::vector<float> input2_data{5, 6, 7, 8}; + float scale = 0.1; + int32_t zero_point_1 = 1; + int32_t zero_point_2 = -1; + + Tensor input1_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +// TODO: Remove this test when concat w/ fused_activation is supported +TEST_F(ConcatenationTest, With_Fused_Activation_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = 1; + params.activation = luci::FusedActFunc::RELU; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index a51fb4afc..234f95425 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include "PALConv2d.h" #include <stdexcept> #include <thread> @@ -29,8 +30,8 @@ namespace kernels { Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const Conv2DParams ¶ms) - : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params) + Tensor *scratchpad, const Conv2DParams ¶ms) + : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params) { } @@ -44,7 +45,11 @@ void Conv2D::configure() // (3) | uint8 uint8 int32 uint8 | quantized // (4) | int8 int8 int32 int8 | quantized per channel // - // We only support (1) and (3) for now. + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); @@ -53,6 +58,21 @@ void Conv2D::configure() { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); + } else { throw std::runtime_error("Unsupported type."); @@ -75,11 +95,11 @@ void Conv2D::configure() bias()->shape().dim(0) == output_depth)); const int32_t output_height = - computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, - _params.dilation_height_factor); + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); const int32_t output_width = - computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, - _params.dilation_width_factor); + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, input_height, filter_height, output_height); @@ -88,20 +108,28 @@ void Conv2D::configure() output()->resize({batches, output_height, output_width, output_depth}); - // Allocate tensor for Im2Col, if needed. - // The checks here should be aligned with the actual implementation. - const bool need_dilated_im2col = - _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1; - const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 || - filter_height != 1 || filter_width != 1; - const bool need_im2col = need_dilated_im2col || need_non_dilated_im2col; - if (need_im2col) + // Allocate tensor for scratchpad, if needed. + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params, + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); + + switch (_params.activation) { - const int input_depth = input_shape.dim(3); - Shape im2col_shape{batches, output_height, output_width, - input_depth * filter_height * filter_width}; - _im2col = - std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, ""); + case Activation::NONE: + case Activation::RELU: + case Activation::RELU6: + case Activation::RELU_N1_TO_1: + break; + default: + throw std::runtime_error("Unsupported fused activation"); } } @@ -117,7 +145,23 @@ void Conv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); break; default: throw std::runtime_error("Unsupported type."); @@ -140,11 +184,16 @@ void Conv2D::evalFloat() const params.float_activation_min = activation_min; params.float_activation_max = activation_max; - tflite::optimized_ops::Conv(params, getTensorShape(input()), getTensorData<float>(input()), - getTensorShape(filter()), getTensorData<float>(filter()), - getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), - getTensorShape(_im2col.get()), getTensorData<float>(_im2col.get())); + auto scratchpad = getOutputTensors()[1]; + float *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<float>(); + + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(filter()), getTensorData<float>(filter()), + getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output()), + getTensorShape(scratchpad), scratchpad_data); } void Conv2D::evalQuantized() const @@ -178,16 +227,229 @@ void Conv2D::evalQuantized() const params.quantized_activation_min = activation_min; params.quantized_activation_max = activation_max; - // TODO This should only be done once (although it takes only a few microseconds). - // Also, the user should be able to adjust the number of threads. - auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>(); - gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency())); + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(filter()), getTensorData<uint8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output()), + getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad)); +} - tflite::optimized_ops::Conv( - params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), - getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), - getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()), - getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get()); +void Conv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector<ChannelQuantMultipliers> multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int32_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void Conv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::ConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector<ChannelQuantMultipliers> multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, multipliers[out_c].multiplier, multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 69e309852..330bf3a2a 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams> { public: Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const Conv2DParams ¶ms); + Tensor *scratchpad, const Conv2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } @@ -44,9 +44,11 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _im2col; int32_t _padding_height{}; int32_t _padding_width{}; }; diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index 0446d9760..0fe6ef795 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Conv2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,27 +27,39 @@ namespace using namespace testing; -TEST(Conv2DTest, Float) +class Conv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Conv2DTest, Float) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; Shape bias_shape{2}; std::vector<float> input_data{ - 1, 2, 3, 4, 5, 6, // row = 0 - 7, 8, 9, 10, 11, 12, // row = 1 - 13, 14, 15, 16, 17, 18, // row = 2 - 19, 20, 21, 22, 23, 24, // row = 3 + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 }; std::vector<float> filter_data{ - 1, 2, -3, -4, // out = 0, row = 0 - -5, 6, -7, 8, // out = 1, row = 0 - 4, -2, 3, -1, // out = 0, row = 1 - -8, -6, 7, 5, // out = 1, row = 1 + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -57,42 +70,95 @@ TEST(Conv2DTest, Float) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 11, 16, 7, 20, // row = 0 - 0, 40, 0, 44, // row = 1 + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 }; std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, FloatCheck) +TEST_F(Conv2DTest, FloatPointwise) +{ + Shape input_shape{1, 2, 2, 2}; + Shape filter_shape{2, 1, 1, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, // row = 0, col = 0 + 3, 4, // row = 0, col = 1 + 5, 6, // row = 1, col = 0 + 7, 8, // row = 1, col = 1 + }; + std::vector<float> filter_data{ + -1, 2, // out = 0 + -3, 4, // out = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 4, 7, 6, 9, // row = 0 + 8, 11, 10, 13, // row = 1 + }; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatCheck) { Shape input_shape{2, 2, 4, 1}; Shape filter_shape{3, 2, 2, 1}; Shape bias_shape{3}; std::vector<float> input_data{ - // First batch - 1, 1, 1, 1, // row = 1 - 2, 2, 2, 2, // row = 2 - // Second batch - 1, 2, 3, 4, // row = 1 - 1, 2, 3, 4, // row = 2 + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 }; std::vector<float> filter_data{ - 1, 2, 3, 4, // first 2x2 filter - -1, 1, -1, 1, // second 2x2 filter - -1, -1, 1, 1, // third 2x2 filter + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter }; std::vector<float> bias_data{1, 2, 3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -103,57 +169,130 @@ TEST(Conv2DTest, FloatCheck) params.dilation_width_factor = 1; params.activation = Activation::NONE; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); std::vector<float> ref_output_data{ - 18, 2, 5, // first batch, left - 18, 2, 5, // first batch, right - 17, 4, 3, // second batch, left - 37, 4, 3, // second batch, right + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right }; std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, Uint8) +TEST_F(Conv2DTest, Uint8) { + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); - Shape bias_shape = {3}; - Tensor input_tensor{ - DataType::U8, {2, 2, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; - Tensor filter_tensor{ - DataType::U8, {3, 2, 2, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; - Tensor bias_tensor{ - DataType::S32, bias_shape, {{input_quant_param.first * input_quant_param.first}, {0}}, ""}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, Uint8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); Tensor output_tensor = - makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); - std::vector<uint8_t> quantized_input = quantize<uint8_t>( - { - // First batch - 1, 1, 1, 1, // row = 1 - 2, 2, 2, 2, // row = 2 - // Second batch - 1, 2, 3, 4, // row = 1 - 1, 2, 3, 4, // row = 2 - }, - input_quant_param.first, input_quant_param.second); - std::vector<uint8_t> quantized_filter = quantize<uint8_t>( - { - 1, 2, 3, 4, // first 2x2 filter - -1, 1, -1, 1, // second 2x2 filter - -1, -1, 1, 1, // third 2x2 filter - }, - input_quant_param.first, input_quant_param.second); - std::vector<int32_t> bias_data = - quantize<int32_t>({1, 2, 3}, input_quant_param.first * input_quant_param.first, 0); - input_tensor.writeData(quantized_input.data(), quantized_input.size() * sizeof(uint8_t)); - filter_tensor.writeData(quantized_filter.data(), quantized_filter.size() * sizeof(uint8_t)); - bias_tensor.writeData(bias_data.data(), bias_data.size() * sizeof(int32_t)); + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); Conv2DParams params{}; params.padding = Padding::VALID; @@ -163,44 +302,237 @@ TEST(Conv2DTest, Uint8) params.dilation_width_factor = 1; params.activation = Activation::NONE; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); std::vector<float> ref_output_data{ - 18, 2, 5, // first batch, left - 18, 2, 5, // first batch, right - 17, 4, 3, // second batch, left - 37, 4, 3, // second batch, right + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right }; std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), - output_quant_param.first, output_quant_param.second), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, Unsupported_Type_Configure_NEG) +TEST_F(Conv2DTest, SInt8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::S8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt16) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + std::vector<float> ref_output_data{ + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, SInt16_CWQ_weights) +{ + Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C + Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels + Shape bias_shape{3}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 3}; + + std::vector<float> input_data{ + 1, 2, // row = 0, col 0 + 3, 4, // row = 0, col 1 + 5, 6, // row = 1, col 0 + 7, 8, // row = 1, col 1 + }; + std::vector<float> filter_data{ + 4, -3, // out = 0 + 1, -3, // out = 1 + 5, -3, // out = 2 + }; + std::vector<float> bias_data{1, 10, 5}; + std::vector<float> ref_output_data{ + 0, 5, 4, // row 0, col 0 + 1, 1, 8, // row 0, col 1 + 3, 0, 12, // row 1, col 0 + 5, 0, 16, // row 1, col 1 + }; + + float input_scale = 0.25f; + float output_scale = 0.05f; + std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f}; + std::vector<float> bias_scales; + for (int i = 0; i < filter_scales.size(); ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector<int32_t> zerop = {0, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; Shape bias_shape{2}; std::vector<int32_t> input_data{ - 1, 2, 3, 4, 5, 6, // row = 0 - 7, 8, 9, 10, 11, 12, // row = 1 - 13, 14, 15, 16, 17, 18, // row = 2 - 19, 20, 21, 22, 23, 24, // row = 3 + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 }; std::vector<float> filter_data{ - 1, 2, -3, -4, // out = 0, row = 0 - -5, 6, -7, 8, // out = 1, row = 0 - 4, -2, 3, -1, // out = 0, row = 1 - -8, -6, 7, 5, // out = 1, row = 1 + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -211,31 +543,34 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Bias_Type_NEG) +TEST_F(Conv2DTest, Invalid_Bias_Type_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; Shape bias_shape{2}; std::vector<float> input_data{ - 1, 2, 3, 4, 5, 6, // row = 0 - 7, 8, 9, 10, 11, 12, // row = 1 - 13, 14, 15, 16, 17, 18, // row = 2 - 19, 20, 21, 22, 23, 24, // row = 3 + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 }; std::vector<float> filter_data{ - 1, 2, -3, -4, // out = 0, row = 0 - -5, 6, -7, 8, // out = 1, row = 0 - 4, -2, 3, -1, // out = 0, row = 1 - -8, -6, 7, 5, // out = 1, row = 1 + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<uint8_t> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -246,31 +581,35 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Bias_Data_NEG) +TEST_F(Conv2DTest, Invalid_Bias_Data_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; Shape bias_shape{3}; std::vector<float> input_data{ - 1, 2, 3, 4, 5, 6, // row = 0 - 7, 8, 9, 10, 11, 12, // row = 1 - 13, 14, 15, 16, 17, 18, // row = 2 - 19, 20, 21, 22, 23, 24, // row = 3 + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 }; std::vector<float> filter_data{ - 1, 2, -3, -4, // out = 0, row = 0 - -5, 6, -7, 8, // out = 1, row = 0 - 4, -2, 3, -1, // out = 0, row = 1 - -8, -6, 7, 5, // out = 1, row = 1 + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2, 3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -281,31 +620,35 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Input_Shape_NEG) +TEST_F(Conv2DTest, Invalid_Input_Shape_NEG) { Shape input_shape{1, 4, 6, 1}; Shape filter_shape{2, 2, 2, 2}; Shape bias_shape{2}; std::vector<float> input_data{ - 1, 2, 3, 4, 5, 6, // row = 0 - 7, 8, 9, 10, 11, 12, // row = 1 - 13, 14, 15, 16, 17, 18, // row = 2 - 19, 20, 21, 22, 23, 24, // row = 3 + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 }; std::vector<float> filter_data{ - 1, 2, -3, -4, // out = 0, row = 0 - -5, 6, -7, 8, // out = 1, row = 0 - 4, -2, 3, -1, // out = 0, row = 1 - -8, -6, 7, 5, // out = 1, row = 1 + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -316,7 +659,46 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::TANH; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp index cab63e26d..3a9acd1d4 100644 --- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp @@ -16,7 +16,7 @@ #include "DepthToSpace.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALDepthToSpace.h" namespace luci_interpreter { @@ -24,26 +24,16 @@ namespace kernels { DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms) - : KernelWithParams<DepthToSpaceParams>({input}, {output}, params) + : KernelWithParams<DepthToSpaceParams>({input}, {output}, params) { } void DepthToSpace::configure() { - if (input()->shape().num_dims() != 4) - { - throw std::runtime_error("Invalid input num_dims."); - } - if (output()->element_type() != DataType::FLOAT32 && output()->element_type() != DataType::U8 && - output()->element_type() != DataType::S8 && output()->element_type() != DataType::S32 && - output()->element_type() != DataType::S64) - { - throw std::runtime_error("Invalid output type"); - } - if (input()->element_type() != output()->element_type()) - { - throw std::runtime_error("Type mismatch on input and output."); - } + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8) + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()) const int block_size = params().block_size; const int32_t input_height = input()->shape().dim(1); const int32_t input_width = input()->shape().dim(2); @@ -52,9 +42,9 @@ void DepthToSpace::configure() int32_t output_width = input_width * block_size; int32_t output_channels = input_channels / block_size / block_size; - assert(input_height == output_height / block_size); - assert(input_width == output_width / block_size); - assert(input_channels == output_channels * block_size * block_size); + LUCI_INTERPRETER_CHECK(input_height == output_height / block_size); + LUCI_INTERPRETER_CHECK(input_width == output_width / block_size); + LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size); Shape output_shape(4); output_shape.dim(0) = input()->shape().dim(0); @@ -72,14 +62,14 @@ void DepthToSpace::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported Type."); diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp index 1b805702d..88e6e07f1 100644 --- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp @@ -16,6 +16,7 @@ #include "kernels/DepthToSpace.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,16 +32,18 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(DepthToSpaceTest, DataTypes); +TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes); TYPED_TEST(DepthToSpaceTest, SimpleCase) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8}; Shape input_shape{1, 1, 2, 4}; std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8}; std::vector<int32_t> output_shape{1, 2, 4, 1}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); DepthToSpaceParams params{}; @@ -48,6 +51,7 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase) DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), @@ -55,6 +59,57 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } +TEST(DepthToSpaceTest, InvalidInputShape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InvalidBlockSize_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 3; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp index b01a5e086..c554c309d 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -18,8 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> -#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include "PALDepthwiseConv2d.h" #include <stdexcept> @@ -29,8 +28,9 @@ namespace kernels { DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output, const DepthwiseConv2DParams ¶ms) - : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params) + Tensor *output, Tensor *scratchpad, + const DepthwiseConv2DParams ¶ms) + : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params) { } @@ -45,43 +45,62 @@ void DepthwiseConv2D::configure() // (4) | int8 int8 int32 int8 | quantized per channel // (5) | int16 int8 int64 int16 | quantized per channel 16x8 // - // We only support (1) and (3) for now. + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) { - assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); } else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) { - assert(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) == + filter()->scales().size()); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); } else { throw std::runtime_error("Unsupported type."); } - assert(output()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); const Shape &input_shape = input()->shape(); const Shape &filter_shape = filter()->shape(); - assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); const int32_t batches = input_shape.dim(0); const int32_t input_height = input_shape.dim(1); const int32_t input_width = input_shape.dim(2); // Filter format: [1, H, W, O]. - assert(filter_shape.dim(0) == 1); + LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1); const int32_t filter_height = filter_shape.dim(1); const int32_t filter_width = filter_shape.dim(2); const int32_t channels_out = filter_shape.dim(3); - assert(bias() == nullptr || - (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == channels_out)); + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == channels_out)); const int32_t output_height = - computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, - _params.dilation_height_factor); + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); const int32_t output_width = - computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, - _params.dilation_width_factor); + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, input_height, filter_height, output_height); @@ -89,6 +108,16 @@ void DepthwiseConv2D::configure() filter_width, output_width); output()->resize({batches, output_height, output_width, channels_out}); + + tflite::DepthwiseParams params{}; + + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(), + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); } void DepthwiseConv2D::execute() const @@ -103,7 +132,23 @@ void DepthwiseConv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(3))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); break; default: throw std::runtime_error("Unsupported type."); @@ -128,9 +173,100 @@ void DepthwiseConv2D::evalFloat() const params.float_activation_max = activation_max; tflite::reference_ops::DepthwiseConv( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output())); + params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), + getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void DepthwiseConv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - _padding_width; + const int in_y_origin = (out_y * stride_height) - _padding_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + int32 input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)]; + int32 filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += (filter_val - filter()->zero_points()[output_channel]) * + (input_val - input()->zero_point()); + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + int32_t output_multiplier = quant_multipliers[output_channel].multiplier; + int output_shift = quant_multipliers[output_channel].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast<uint8_t>(scaled_acc); + } + } + } + } + } } void DepthwiseConv2D::evalQuantized() const @@ -166,9 +302,149 @@ void DepthwiseConv2D::evalQuantized() const params.quantized_activation_max = activation_max; tflite::reference_ops::DepthwiseConv( - params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), - getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), - getTensorShape(output()), getTensorData<uint8_t>(output())); + params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), + getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void DepthwiseConv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + + params.padding_type = tflite::PaddingType::kSame; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; + params.output_offset = output()->zero_point(); + params.output_multiplier = 1; // unused in tflite code + params.output_shift = 0; // unused in tflite code + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void DepthwiseConv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + for (int32_t m = 0; m < depth_multiplier; ++m) + { + const int32_t out_c = m + in_c * depth_multiplier; + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)]; + acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + if (bias_data != nullptr) + { + acc += bias_data[out_c]; + } + + int32_t output_multiplier = quant_multipliers[out_c].multiplier; + int output_shift = quant_multipliers[out_c].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } + } } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h index 62f4bff0e..3d1faf6c1 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams> { public: DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const DepthwiseConv2DParams ¶ms); + Tensor *scratchpad, const DepthwiseConv2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } @@ -42,6 +42,9 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; private: int32_t _padding_height{}; diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp index a9b43d864..6b4673f3e 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/DepthwiseConv2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,27 +27,39 @@ namespace using namespace testing; -TEST(DepthwiseConv2DTest, Float) +class DepthwiseConv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DepthwiseConv2DTest, Float) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 2, 4}; Shape bias_shape{4}; std::vector<float> input_data{ - 1, 2, 7, 8, // - 3, 4, 9, 10, // - 5, 6, 11, 12, // - 13, 14, 15, 16, // + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // }; std::vector<float> filter_data{ - 1, 2, 3, 4, // - -9, 10, -11, 12, // - 5, 6, 7, 8, // - 13, -14, 15, -16, // + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -58,54 +71,50 @@ TEST(DepthwiseConv2DTest, Float) params.dilation_width_factor = 1; params.activation = Activation::RELU; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 71, 0, 99, 0, // - 167, 0, 227, 28, // + 71, 0, 99, 0, // + 167, 0, 227, 28, // }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); } -TEST(DepthwiseConv2DTest, Uint8) +TEST_F(DepthwiseConv2DTest, Uint8) { + std::vector<float> input_data{ + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); - Tensor input_tensor{ - DataType::U8, {1, 3, 2, 2}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; - Tensor filter_tensor{ - DataType::U8, {1, 2, 2, 4}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; - Tensor bias_tensor{ - DataType::S32, {4}, {{input_quant_param.first * input_quant_param.first}, {0}}, ""}; + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); Tensor output_tensor = - makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); - - std::vector<uint8_t> quant_input = quantize<uint8_t>( - { - 1, 2, 7, 8, // column 1 - 3, 4, 9, 10, // column 2 - 5, 6, 11, 12, // column 3 - }, - input_quant_param.first, input_quant_param.second); - std::vector<uint8_t> quant_filter = quantize<uint8_t>( - { - 1, 2, 3, 4, // - -9, 10, -11, 12, // - 5, 6, 7, 8, // - 13, -14, 15, -16, // - }, - input_quant_param.first, input_quant_param.second); - std::vector<int32_t> quant_bias = - quantize<int32_t>({1, 2, 3, 4}, input_quant_param.first * input_quant_param.first, 0); - - input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t)); - filter_tensor.writeData(quant_filter.data(), quant_filter.size() * sizeof(uint8_t)); - bias_tensor.writeData(quant_bias.data(), quant_bias.size() * sizeof(int32_t)); + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); DepthwiseConv2DParams params{}; params.padding = Padding::VALID; @@ -116,20 +125,498 @@ TEST(DepthwiseConv2DTest, Uint8) params.dilation_width_factor = 1; params.activation = Activation::NONE; - DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); kernel.execute(); std::vector<float> ref_output_data{ - 71, -34, 99, -20, // - 91, -26, 127, -4, // + 71, -34, 99, -20, // + 91, -26, 127, -4, // }; - EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); } +TEST_F(DepthwiseConv2DTest, SInt16) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, 4}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S64, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + float input_scale = 0.25; + std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f}; + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector<int32_t> zerop(4, 0); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(1, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<int32_t> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{4, 2, 2}; + Shape filter_shape{2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{2, 1, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 4, 2}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp new file mode 100644 index 000000000..96399e5c7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/Utils.h" +#include "PALDequantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Dequantize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 || + input()->element_type() == loco::DataType::U8 || + input()->element_type() == loco::DataType::S16); + + LUCI_INTERPRETER_CHECK(input()->scales().size() == 1); + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + + output()->resize(input()->shape()); +} + +void Dequantize::execute() const +{ + tflite::DequantizationParams op_params; + op_params.zero_point = input()->zero_point(); + op_params.scale = input()->scale(); + + switch (input()->element_type()) + { + case loco::DataType::U8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int16_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h new file mode 100644 index 000000000..5565df0e4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Dequantize : public Kernel +{ +public: + Dequantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp new file mode 100644 index 000000000..0cab633d6 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DequantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DequantizeTest, Uint8) +{ + std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint8) +{ + std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint16) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidOutputType_NEG) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp new file mode 100644 index 000000000..dd1532278 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Div.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/div.h> +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms) + : KernelWithParams<DivParams>({input1, input2}, {output}, params) +{ +} + +void Div::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Div::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Div::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Div::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Div::evalQuantized() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_output_multiplier = input1_scale / (input2_scale * output_scale); + + int32_t output_multiplier{}; + int output_shift{}; + + quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), + getTensorShape(input2()), getTensorData<uint8_t>(input2()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h new file mode 100644 index 000000000..c1bf3e10b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Div.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DIV_H +#define LUCI_INTERPRETER_KERNELS_DIV_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Div : public KernelWithParams<DivParams> +{ +public: + Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DIV_H diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp new file mode 100644 index 000000000..85cd8b90a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +float GetTolerance(float min, float max) +{ + const float kQuantizedStep = (max - min) / 255.0f; + const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST_F(DivTest, Float) +{ + Shape base_shape = {2, 3, 1, 1}; + + std::vector<int32_t> output_shape = {2, 3, 1, 1}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f}; + std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST_F(DivTest, FloatBroadcast) +{ + Shape input1_shape = {1, 3}; + Shape input2_shape = {3, 1}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f}; + std::vector<float> input2_data{0.2f, 1.6f, 0.5f}; + std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(DivTest, Uint8) +{ + Shape base_shape = {1, 2, 2, 1}; + + std::vector<int32_t> output_shape = {1, 2, 2, 1}; + + std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f}; + std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f}; + std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f}; + + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f); + + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get()); + + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(test_outputs, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, // + 10, 0, 4, 5, 20, 0, // + 0, 0, 0, 2, 0, 0, // + 2, 0, 1, 10, 5, 0, // + 2, 3, 1, 0, 5, 1, // + 18, 20, 7, 0, 37, 10}, + {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10}, + {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0, + 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10}, + {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}}; + std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100}; + std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(DivTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DivTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(DivTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp index 5de4a1f3b..697d63be4 100644 --- a/compiler/luci-interpreter/src/kernels/Elu.cpp +++ b/compiler/luci-interpreter/src/kernels/Elu.cpp @@ -17,7 +17,7 @@ #include "kernels/Elu.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALElu.h" #include <stdexcept> @@ -31,7 +31,7 @@ Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} void Elu::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); output()->resize(input()->shape()); } @@ -40,8 +40,8 @@ void Elu::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp index 52444cbea..814499cdb 100644 --- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Elu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,34 +30,50 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float)); - + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Elu kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); (void)output_shape; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); } TEST(EluTest, SimpleElu) { Check( - /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, - /*input_data=*/ - { - 0, -6, 2, -4, // - 3, -2, 10, -0.1, // - }, - /*output_data=*/ - { - 0.0, -0.997521, 2.0, -0.981684, // - 3.0, -0.864665, 10.0, -0.0951626, // - }); + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }, + /*output_data=*/ + { + 0.0, -0.997521, 2.0, -0.981684, // + 3.0, -0.864665, 10.0, -0.0951626, // + }); +} + +TEST(EluTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Elu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp new file mode 100644 index 000000000..a57e127b7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Equal.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Equal::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Equal::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Equal::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void Equal::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Equal::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h new file mode 100644 index 000000000..c9be32cc0 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Equal.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Equal : public Kernel +{ +public: + Equal(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp new file mode 100644 index 000000000..5870e5460 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class EqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(EqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + false, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(EqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + false, false, false, // Row 2 + false, false, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + false, true, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(EqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(EqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(EqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, false, false, // Row 1 + false, true, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, false, false, false, // Row 1 + false, false, true, false, // Row 2 + false, false, false, false, // Row 3 + true, true, true, true, // Row 4 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-interpreter/src/kernels/Exp.cpp new file mode 100644 index 000000000..e7c560a88 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Exp.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/exp.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Exp::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Exp::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Exp::evalFloat() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Exp.h b/compiler/luci-interpreter/src/kernels/Exp.h new file mode 100644 index 000000000..429177375 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Exp.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXP_H +#define LUCI_INTERPRETER_KERNELS_EXP_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Exp : public Kernel +{ +public: + Exp(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXP_H diff --git a/compiler/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-interpreter/src/kernels/Exp.test.cpp new file mode 100644 index 000000000..a159d9db9 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Exp.test.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(ExpTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{1, 1, 7}; + std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Exp kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{1, 1, 7}; + std::vector<float> ref_output_data{std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f), + std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f), + std::exp(-0.01f)}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp new file mode 100644 index 000000000..ba35c99fa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output) + : Kernel({input, axis}, {output}) +{ +} + +void ExpandDims::configure() +{ + int32_t axis_value; + + switch (axis()->element_type()) + { + case loco::DataType::S32: + axis_value = *getTensorData<int32_t>(axis()); + break; + case loco::DataType::S64: + axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis())); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + const auto input_shape = input()->shape(); + + if (axis_value < 0) + { + axis_value += input_shape.num_dims() + 1; + } + + LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0); + + Shape output_shape(input_shape.num_dims() + 1); + for (int32_t i = 0; i < output_shape.num_dims(); ++i) + { + if (i < axis_value) + { + output_shape.dim(i) = input_shape.dim(i); + } + else if (i == axis_value) + { + output_shape.dim(i) = 1; + } + else + { + LUCI_INTERPRETER_CHECK(i >= 1); + output_shape.dim(i) = input_shape.dim(i - 1); + } + } + + output()->resize(output_shape); +} + +void ExpandDims::execute() const +{ + // Just copy input to output + const auto *input_data = input()->data<void>(); + auto *output_data = output()->data<void>(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h new file mode 100644 index 000000000..e510b1160 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H +#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ExpandDims : public Kernel +{ +public: + ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp new file mode 100644 index 000000000..df9eaccc0 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ExpandDimsTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ExpandDimsTest, PositiveAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2})); +} + +TEST_F(ExpandDimsTest, NegAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {-1}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1})); +} + +TEST_F(ExpandDimsTest, InvalidAxisType_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<float> axis_value = {1.0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ExpandDimsTest, InvalidAxisValue_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {3}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp new file mode 100644 index 000000000..e09d6331a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/Utils.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output) + : Kernel({dims, value}, {output}) +{ +} + +template <typename T> void Fill::configureShape() +{ + const auto dims_data = getTensorData<T>(dims()); + Shape output_shape(dims()->shape().dim(0)); + + for (int i = 0; i < output_shape.num_dims(); ++i) + { + T data = dims_data[i]; + if (data < 0) + throw std::runtime_error("Fill dimensions must be >= 0"); + + output_shape.dim(i) = data; + } + + output()->resize(output_shape); +} + +void Fill::configure() +{ + const auto dims_shape = dims()->shape(); + const auto value_shape = value()->shape(); + + // Make sure the 1st input tensor is 1-D + LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1); + + // Make sure the 1st input tensor is int32 or int64 + LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or + dims()->element_type() == DataType::S64); + + // Make sure the 2nd input tensor is a scalar + LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0) + + // Check zero point and scale for S16 and S8 + if (value()->element_type() == loco::DataType::S16 or + value()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale()); + LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point()); + + if (value()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(value()->zero_point() == 0); + } + // Resize output + switch (dims()->element_type()) + { + case DataType::S32: + configureShape<int32_t>(); + break; + case DataType::S64: + configureShape<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Fill::execute() const +{ + switch (output()->element_type()) + { + case DataType::S8: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()), + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + case DataType::S16: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()), + getTensorShape(output()), getTensorData<int16_t>(output())); + break; + case DataType::S32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()), + getTensorShape(output()), getTensorData<int32_t>(output())); + break; + case DataType::S64: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()), + getTensorShape(output()), getTensorData<int64_t>(output())); + break; + case DataType::FLOAT32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()), + getTensorShape(output()), getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h new file mode 100644 index 000000000..184f0cb83 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FILL_H +#define LUCI_INTERPRETER_KERNELS_FILL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Fill : public Kernel +{ +public: + Fill(const Tensor *dims, const Tensor *value, Tensor *output); + + const Tensor *dims() const { return _inputs[0]; } + const Tensor *value() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void configureShape(); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FILL_H diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp new file mode 100644 index 000000000..cf56df507 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FillTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<T> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<float> value_data = {5}; + + int32_t zero_point = 0; + + if (DT == loco::DataType::S8) + zero_point = 1; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point, + value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, FillInt) +{ + // Run for int32_t input + runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get()); + // Run for int64_t input + runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get()); + // Run for int8_t input + runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get()); + // Run for int16_t input + runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get()); + + SUCCEED(); +} + +TEST_F(FillTest, FillFloat) +{ + Shape dims_shape{3}; + + std::vector<int64_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5}; + + std::vector<int32_t> ref_output_shape{2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, Invalid_Input_Shape_NEG) +{ + Shape dims_shape{1, 3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FillTest, Invalid_Value_Shape_NEG) +{ + Shape dims_shape{3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-interpreter/src/kernels/Floor.cpp new file mode 100644 index 000000000..e3c4246cc --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Floor.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/floor.h> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Floor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Floor::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Floor::evalFloat() const +{ + tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Floor.h b/compiler/luci-interpreter/src/kernels/Floor.h new file mode 100644 index 000000000..ca3ad5997 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Floor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Floor : public Kernel +{ +public: + Floor(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp new file mode 100644 index 000000000..30076fb54 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorTest, SimpleFloat) +{ + std::initializer_list<int32_t> input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0.2, 8.6, 2.4, 4.3, // Row 1 + 3, 7.1, 10.5, -0.9, // Row 2 + }; + + std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1}; + std::vector<float> ref_output_data{ + 0, 8, 2, 4, // Row 1 + 3, 7, 10, -1, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Floor kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Floor kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp new file mode 100644 index 000000000..a7a10a336 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +void FloorDiv::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void FloorDiv::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FloorDiv::evalFloat() const +{ + auto FloorDivFunc = [](float x, float y) -> float { + return std::floor(static_cast<double>(x) / static_cast<double>(y)); + }; + + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + + // Check the denominator + for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i) + { + LUCI_INTERPRETER_CHECK(y_data[i] != 0); + } + + if (x()->shape() != y()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc); + } + else + { + tflite::reference_ops::BinaryFunction<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-interpreter/src/kernels/FloorDiv.h new file mode 100644 index 000000000..e9c47d81a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorDiv.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FloorDiv : public Kernel +{ +public: + FloorDiv(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp new file mode 100644 index 000000000..3e1b5f18e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorDivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorDivTest, FloatSimple) +{ + Shape x_shape{2, 3}; + std::vector<float> x_data{ + 0.5, 2.4, 3.1, // Row 1 + 1.9, -1.9, -2.8, // Row 2 + }; + + Shape y_shape = x_shape; + std::vector<float> y_data{ + 2.0, 0.5, 3.0, // Row 1 + 1.0, -1.0, -2.0, // Row 2 + }; + + std::vector<int32_t> ref_output_shape{2, 3}; + std::vector<float> ref_output_data{ + 0, 4, 1, // Row 1 + 1, 1, 1, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, FloatBroadcast) +{ + Shape x_shape{1, 3}; + std::vector<float> x_data{ + 0.5, 2.4, -3.1, // Row 1 + }; + + Shape y_shape{3, 3}; + std::vector<float> y_data{ + 1.0, 1.0, 1.0, // Row 1 + 2.0, -0.5, -2.0, // Row 2 + 0.3, 0.7, 0.9, // Row 3 + }; + + std::vector<int32_t> ref_output_shape{3, 3}; + std::vector<float> ref_output_data{ + 0, 2, -4, // Row 1 + 0, -5, 1, // Row 2 + 1, 3, -4, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, DivByZero_NEG) +{ + Shape shape{3}; + std::vector<float> x_data{1, 0, -1}; + std::vector<float> y_data{0, 0, 0}; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorDivTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.cpp b/compiler/luci-interpreter/src/kernels/FloorMod.cpp new file mode 100644 index 000000000..a64fcad3a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorMod.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorMod.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <cmath> + +namespace +{ + +template <typename T> T FloorDivFunc(T input1, T input2) +{ + struct FloatMod + { + float operator()(const float lhs, const float rhs) const { return std::fmod(lhs, rhs); } + }; + using ModFunc = + typename std::conditional<std::is_integral<T>::value, std::modulus<T>, FloatMod>::type; + ModFunc mod_func; + T trunc_mod = mod_func(input1, input2); + return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0)) ? (trunc_mod + input2) : trunc_mod; +} + +} // namespace + +namespace luci_interpreter +{ + +namespace kernels +{ + +FloorMod::FloorMod(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void FloorMod::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void FloorMod::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S8: + evalInteger<int8_t>(); + break; + case DataType::S16: + evalInteger<int16_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FloorMod::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + + if (x()->shape() != y()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc<float>); + } + else + { + tflite::reference_ops::BinaryFunction<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc<float>); + } +} + +template <typename T> void FloorMod::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + + // Check the denominator + const auto y_data_type = y()->element_type(); + if (y_data_type == DataType::S8 || y_data_type == DataType::S16 || y_data_type == DataType::S32 || + y_data_type == DataType::S64) + { + for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i) + { + LUCI_INTERPRETER_CHECK(y_data[i] != 0); + } + } + + if (x()->shape() != y()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<T>(output()), FloorDivFunc<T>); + } + else + { + tflite::reference_ops::BinaryFunction<T, T, T>(getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), + getTensorData<T>(output()), FloorDivFunc<T>); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.h b/compiler/luci-interpreter/src/kernels/FloorMod.h new file mode 100644 index 000000000..f2d9b2ae8 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorMod.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FloorMod : public Kernel +{ +public: + FloorMod(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_MOD_H diff --git a/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp b/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp new file mode 100644 index 000000000..123a91e3a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/FloorMod.test.cpp @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorMod.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorModTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorModTest, Simple) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int32_t> input1_data{10, 9, 11, 3}; + + Shape input2_shape = input1_shape; + std::vector<int32_t> input2_data{2, 2, 3, 4}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int32_t> ref_output_data{0, 1, 2, 3}; + + Tensor input1_tensor = + makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, NegativeValue) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int32_t> input1_data{10, -9, -11, 7}; + + Shape input2_shape = input1_shape; + std::vector<int32_t> input2_data{2, 2, -3, -4}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int32_t> ref_output_data{0, 1, -2, -1}; + + Tensor input1_tensor = + makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, BroadcastFloorMod) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int32_t> input1_data{ + 10, + -9, + -11, + 7, + }; + + Shape input2_shape{1}; + std::vector<int32_t> input2_data{-3}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int32_t> ref_output_data{-2, 0, -2, -2}; + + Tensor input1_tensor = + makeInputTensor<DataType::S32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, Int64WithBroadcast) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int64_t> input1_data{10, -9, -11, (1LL << 34) + 9}; + + Shape input2_shape{1}; + std::vector<int64_t> input2_data{-(1LL << 33)}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int64_t> ref_output_data{-8589934582, -9, -11, -8589934583}; + + Tensor input1_tensor = + makeInputTensor<DataType::S64>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S64>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int64_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, FloatSimple) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<float> input1_data{10.0, 9.0, 11.0, 3.0}; + + Shape input2_shape = input1_shape; + std::vector<float> input2_data{2.0, 2.0, 3.0, 4.0}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> ref_output_data{0.0, 1.0, 2.0, 3.0}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, FloatNegativeValue) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<float> input1_data{10.0, -9.0, -11.0, 7.0}; + + Shape input2_shape = input1_shape; + std::vector<float> input2_data{2.0, 2.0, -3.0, -4.0}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> ref_output_data{0.0, 1.0, -2.0, -1.0}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, FloatBroadcast) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<float> input1_data{ + 10.0, + -9.0, + -11.0, + 7.0, + }; + + Shape input2_shape{1}; + std::vector<float> input2_data{-3.0}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> ref_output_data{-2.0, 0.0, -2.0, -2.0}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, SimpleInt16) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int16_t> input1_data{10, 9, 11, 3}; + + Shape input2_shape = input1_shape; + std::vector<int16_t> input2_data{2, 2, 3, 4}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int16_t> ref_output_data{0, 1, 2, 3}; + + Tensor input1_tensor = + makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, NegativeValueInt16) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int16_t> input1_data{110, -9, -11, 7}; + + Shape input2_shape = input1_shape; + std::vector<int16_t> input2_data{2, 2, -3, -4}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int16_t> ref_output_data{0, 1, -2, -1}; + + Tensor input1_tensor = + makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, BroadcastFloorModInt16) +{ + Shape input1_shape{1, 2, 2, 1}; + std::vector<int16_t> input1_data{10, -9, -11, 7}; + + Shape input2_shape{1}; + std::vector<int16_t> input2_data{-3}; + + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<int16_t> ref_output_data{-2, 0, -2, -2}; + + Tensor input1_tensor = + makeInputTensor<DataType::S16>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S16>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorModTest, DivByZero_NEG) +{ + Shape shape{3}; + std::vector<int32_t> input1_data{1, 0, -1}; + std::vector<int32_t> input2_data{0, 0, 0}; + + Tensor input1_tensor = makeInputTensor<DataType::S32>(shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>(shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorModTest, Int64DivByZero_NEG) +{ + Shape shape{3}; + std::vector<int64_t> input1_data{1, 0, -1}; + std::vector<int64_t> input2_data{0, 0, 0}; + + Tensor input1_tensor = makeInputTensor<DataType::S64>(shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>(shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorModTest, Int16DivByZero_NEG) +{ + Shape shape{3}; + std::vector<int16_t> input1_data{1, 0, -1}; + std::vector<int16_t> input2_data{0, 0, 0}; + + Tensor input1_tensor = makeInputTensor<DataType::S16>(shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorModTest, Input_Output_Type_Mismatch_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, Input_Type_Mismatch_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, Float_Broadcast_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, Int64_Broadcast_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, Int32_Broadcast_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, Int16_Broadcast_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({2}, {1, 2}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorModTest, UnsupportedType_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + FloorMod kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + + _memory_manager->allocate_memory(output_tensor); + ASSERT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp index 6529c5e77..bd2bb2f35 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include "PALFullyConnected.h" #include <stdexcept> @@ -30,33 +30,80 @@ namespace kernels FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output, const FullyConnectedParams ¶ms) - : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params) + : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params) { } void FullyConnected::configure() { - if (weights()->element_type() != DataType::FLOAT32) + if (weights()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else if (weights()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32) + } + else if (weights()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else + { throw std::runtime_error("Unsupported type."); - - assert(input()->element_type() == DataType::FLOAT32); - assert(weights()->element_type() == DataType::FLOAT32); - assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + } const Shape &input_shape = input()->shape(); const Shape &weights_shape = weights()->shape(); - assert(weights_shape.num_dims() == 2); - assert(bias() == nullptr || bias()->shape().num_elements() == weights_shape.dim(0)); + LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(bias() == nullptr || + bias()->shape().num_elements() == weights_shape.dim(0)); - assert(input_shape.num_elements() % weights_shape.dim(1) == 0); + LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0); const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1); const int32_t num_units = weights_shape.dim(0); - output()->resize({batch_size, num_units}); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0)); + + if (params().keep_num_dims == false) + { + output()->resize({batch_size, num_units}); + } + else + { + luci_interpreter::Shape output_shape(input_shape.num_dims()); + for (int i = 0; i < input_shape.num_dims(); ++i) + output_shape.dim(i) = input_shape.dim(i); + output_shape.dim(input_shape.num_dims() - 1) = num_units; + output()->resize(output_shape); + } } -void FullyConnected::execute() const { evalFloat(); } +void FullyConnected::execute() const +{ + switch (input()->element_type()) + { + case DataType::U8: + evalQuantized(); + break; + case DataType::S8: + evalQuantizedS8(); + break; + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} void FullyConnected::evalFloat() const { @@ -70,9 +117,75 @@ void FullyConnected::evalFloat() const params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault; tflite::reference_ops::FullyConnected( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()), - getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output())); + params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()), + getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void FullyConnected::evalQuantized() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + tflite::reference_ops::FullyConnected( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()), + getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void FullyConnected::evalQuantizedS8() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + luci_interpreter_pal::FullyConnected<int8_t>( + op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()), + getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<int8_t>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h index 2e3174c74..2a7c068c0 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.h +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h @@ -41,6 +41,8 @@ public: private: void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS8() const; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp index 8077fcb5c..4474cc4fb 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp @@ -16,6 +16,7 @@ #include "kernels/FullyConnected.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,40 +27,232 @@ namespace using namespace testing; -TEST(FullyConnectedTest, Float) +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<int8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <> +void Check<uint8_t>( + std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <typename T> class FullyConnectedTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(FullyConnectedTest, DataTypes); + +TYPED_TEST(FullyConnectedTest, Simple) +{ + Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3}, + { + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }, + { + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }, + {-1, -5, -8}, + { + 0, 0, 32, // batch = 0 + 22, 11, 47, // batch = 1 + }); +} + +TEST(FullyConnectedTest, InvalidBiasType_NEG) { Shape input_shape{3, 2, 2, 1}; std::vector<float> input_data{ - -3, -5, 5, 4, 9, -2, // batch = 0 - -3, -2, -4, 9, -8, 1, // batch = 1 + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 }; Shape weights_shape{3, 6}; std::vector<float> weights_data{ - -3, -7, 4, -4, -6, 4, // unit = 0 - 3, 5, 2, 3, -3, -8, // unit = 1 - -3, 7, 4, 9, 0, -5, // unit = 2 + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }; + Shape bias_shape{3}; + std::vector<int32_t> bias_data{-1, -5, -8}; + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector<float> input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{1, 3, 6}; + std::vector<float> weights_data{ + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 }; Shape bias_shape{3}; std::vector<float> bias_data{-1, -5, -8}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FullyConnectedParams params{}; params.activation = Activation::RELU; FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); - kernel.configure(); - kernel.execute(); + EXPECT_ANY_THROW(kernel.configure()); +} - std::vector<float> ref_output_data{ - 0, 0, 32, // batch = 0 - 22, 11, 47, // batch = 1 +TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector<float> input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + Shape weights_shape{6, 3}; + std::vector<float> weights_data{ + -3, -7, 4, // unit = 0 + -4, -6, 4, // unit = 1 + 3, 5, 2, // unit = 2 + 3, -3, -8, // unit = 3 + -3, 7, 4, // unit = 4 + 9, 0, -5, // unit = 5 + }; + Shape bias_shape{3}; + std::vector<float> bias_data{-1, -5, -8}; + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp new file mode 100644 index 000000000..f1256660f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2021 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/Utils.h" +#include "PALGather.h" + +#include <stdexcept> +#include <cassert> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output, + const GatherParams &gparams) + : KernelWithParams<GatherParams>({params, indices}, {output}, gparams) +{ +} + +void Gather::configure() +{ + if (params()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 || + indices()->element_type() == DataType::S64); + + // refer tensorflow/lite/kernels/gather.cc + + const Shape ¶ms_shape = params()->shape(); + const Shape &indices_shape = indices()->shape(); + + int axis = _params.axis; + if (axis < 0) + { + axis += params_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims()); + + int batch_dims = _params.batch_dims; + // batch_dims should be in range: [-rank(indices), rank(indices)]. + // Negative batch_dims is added with rank of positions. + if (batch_dims < 0) + { + batch_dims += indices_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(batch_dims <= axis); + LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims()); + LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims()); + for (int i = 0; i < batch_dims; ++i) + { + LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i)); + } + + const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims; + + Shape output_shape(num_dimensions); + int output_index = 0; + for (int i = 0; i < axis; ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + for (int i = batch_dims; i < indices_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = indices_shape.dim(i); + } + for (int i = axis + 1; i < params_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + output()->resize(output_shape); +} + +void Gather::execute() const +{ + switch (params()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Gather::evalFloat() const +{ + assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64); + + const auto params_data = getTensorData<float>(params()); + auto output_data = getTensorData<float>(output()); + + tflite::GatherParams tparams; + tparams.axis = _params.axis; + tparams.batch_dims = _params.batch_dims; + + if (indices()->element_type() == DataType::S32) + { + const auto indices_data = getTensorData<int32_t>(indices()); + + luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } + else + { + const auto indices_data = getTensorData<int64_t>(indices()); + + luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h new file mode 100644 index 000000000..cc02d64fb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H +#define LUCI_INTERPRETER_KERNELS_GATHER_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Gather : public KernelWithParams<GatherParams> +{ +public: + Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams); + + const Tensor *params() const { return _inputs[0]; } + const Tensor *indices() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GATHER_H diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp new file mode 100644 index 000000000..4b3dda708 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GatherTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GatherTest, Simple) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4})); +} + +TEST_F(GatherTest, Simple_Batch) +{ + Shape params_shape = {3, 5}; + Shape indices_shape = {3, 2}; + std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.}; + std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3}; + std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get()); + Tensor indices_tensor = + makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2})); +} + +TEST_F(GatherTest, Simple_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Axis_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 100; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Batch_NEG) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 0; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Gelu.cpp b/compiler/luci-interpreter/src/kernels/Gelu.cpp new file mode 100644 index 000000000..44e018e0e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gelu.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gelu.h" + +#include "kernels/Utils.h" + +#include "PALGelu.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Gelu::Gelu(const Tensor *input, Tensor *output, const GeluParams ¶ms) + : KernelWithParams<GeluParams>({input}, {output}, params) +{ +} + +void Gelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void Gelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Gelu::evalFloat() const +{ + luci_interpreter_pal::Gelu(params().approximate, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Gelu.h b/compiler/luci-interpreter/src/kernels/Gelu.h new file mode 100644 index 000000000..c7c8bd971 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gelu.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GELU_H +#define LUCI_INTERPRETER_KERNELS_GELU_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Gelu : public KernelWithParams<GeluParams> +{ +public: + Gelu(const Tensor *input, Tensor *output, const GeluParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + +private: + bool _approximate = false; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GELU_H diff --git a/compiler/luci-interpreter/src/kernels/Gelu.test.cpp b/compiler/luci-interpreter/src/kernels/Gelu.test.cpp new file mode 100644 index 000000000..64428098e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Gelu.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data, + bool approximate) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<float>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + GeluParams params{}; + params.approximate = approximate; + + Gelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); +} + +class GeluTest : public ::testing::Test +{ +}; + +TEST_F(GeluTest, Simple) +{ + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 0.841345f, 2.99595f, // Row 1 + 0.841345f, -0.158655f, -0.0455003f, // Row 2 + }, + /*approximate=*/false); + + SUCCEED(); +} + +TEST_F(GeluTest, Approximate) +{ + Check(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 0.841192f, 2.99636f, // Row 1 + 0.841192f, -0.158808f, -0.0454023f, // Row 2 + }, + /*approximate=*/true); + + SUCCEED(); +} + +TEST_F(GeluTest, DifferentInOutType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + GeluParams params{}; + params.approximate = false; + + Gelu kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp new file mode 100644 index 000000000..5ccae3c38 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Greater.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Greater::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Greater::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Greater::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void Greater::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Greater::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h new file mode 100644 index 000000000..065f76d7b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Greater.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H +#define LUCI_INTERPRETER_KERNELS_GREATER_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Greater : public Kernel +{ +public: + Greater(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_H diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp new file mode 100644 index 000000000..a48080124 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, false, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, false, // Row 1 + true, true, false, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp new file mode 100644 index 000000000..27e42c971 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output) + : Kernel({x, y}, {output}) +{ +} + +void GreaterEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void GreaterEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void GreaterEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void GreaterEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +void GreaterEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h new file mode 100644 index 000000000..e333c30a6 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class GreaterEqual : public Kernel +{ +public: + GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp new file mode 100644 index 000000000..35bf88eab --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value - 1, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, false, // Row 1 + true, true, true, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.cpp b/compiler/luci-interpreter/src/kernels/HardSwish.cpp new file mode 100644 index 000000000..b1008459a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/HardSwish.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/HardSwish.h" +#include "kernels/Utils.h" + +#include "PALHardSwish.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +HardSwish::HardSwish(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void HardSwish::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void HardSwish::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::HardSwish(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.h b/compiler/luci-interpreter/src/kernels/HardSwish.h new file mode 100644 index 000000000..bb9e9b653 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/HardSwish.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_HARDSWISH_H +#define LUCI_INTERPRETER_KERNELS_HARDSWISH_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class HardSwish : public Kernel +{ +public: + HardSwish(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_HARDSWISH_H diff --git a/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp b/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp new file mode 100644 index 000000000..c055fee0e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/HardSwish.test.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/HardSwish.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + HardSwish kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + (void)output_shape; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); +} + +TEST(HardSwishTest, SimpleHardSwish) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }, + /*output_data=*/ + { + 0, -0, 1.66667, -0, // + 3, -0.333333, 10, -0.0483333, // + }); +} + +TEST(HardSwishTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + HardSwish kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp index e6bdee338..971708bca 100644 --- a/compiler/luci-interpreter/src/kernels/If.cpp +++ b/compiler/luci-interpreter/src/kernels/If.cpp @@ -15,6 +15,7 @@ */ #include "kernels/If.h" +#include "kernels/Utils.h" #include <cstring> @@ -33,21 +34,21 @@ static std::vector<const Tensor *> joinInputs(const Tensor *cond, If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs, RuntimeGraph *then_graph, RuntimeGraph *else_graph) - : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph), - _else_graph(else_graph) + : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph), + _else_graph(else_graph) { } void If::configure() { - assert(cond()->element_type() == DataType::BOOL); - assert(cond()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL); + LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1); for (RuntimeGraph *graph : {_then_graph, _else_graph}) { (void)graph; - assert(graph->getInputTensors().size() == getInputTensors().size() - 1); - assert(graph->getOutputTensors().size() == getOutputTensors().size()); + LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1); + LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size()); } } @@ -62,11 +63,13 @@ void If::execute() const // Copy kernel inputs to active graph inputs. for (size_t i = 0; i < getInputTensors().size() - 1; ++i) { - assert(graph_inputs[i]->element_type() == input(i)->element_type()); + LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type()); graph_inputs[i]->resize(input(i)->shape()); const int32_t num_elements = input(i)->shape().num_elements(); const std::size_t element_size = getDataTypeSize(input(i)->element_type()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(graph_inputs[i]); std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size); } @@ -75,8 +78,10 @@ void If::execute() const // Copy graph outputs to kernel outputs. for (size_t i = 0; i < getOutputTensors().size(); ++i) { - assert(graph_outputs[i]->element_type() == output(i)->element_type()); + LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type()); output(i)->resize(graph_outputs[i]->shape()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(output(i)); const int32_t num_elements = output(i)->shape().num_elements(); const std::size_t element_size = getDataTypeSize(output(i)->element_type()); diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp index 9b3857ce3..c5f4faf75 100644 --- a/compiler/luci-interpreter/src/kernels/If.test.cpp +++ b/compiler/luci-interpreter/src/kernels/If.test.cpp @@ -21,6 +21,8 @@ #include "kernels/Mul.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + namespace luci_interpreter { namespace kernels @@ -30,15 +32,27 @@ namespace using namespace testing; -RuntimeGraph *buildAddSubgraph(RuntimeModule *module) +class IfTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input1 = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *input2 = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *output = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); graph->setInputTensors({input1, input2}); graph->setOutputTensors({output}); @@ -50,15 +64,19 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module) return graph; } -RuntimeGraph *buildMulSubgraph(RuntimeModule *module) +RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input1 = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *input2 = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *output = graph->addTensor( - std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); graph->setInputTensors({input1, input2}); graph->setOutputTensors({output}); @@ -70,40 +88,72 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module) return graph; } -TEST(IfTest, CondTrue) +TEST_F(IfTest, CondTrue) { - Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); kernel.configure(); + _memory_manager->allocate_memory(output); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({6, 9}))); + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9})); } -TEST(IfTest, CondFalse) +TEST_F(IfTest, CondFalse) { - Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); kernel.configure(); + _memory_manager->allocate_memory(output); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({5, 14}))); + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14})); +} + +TEST_F(IfTest, InvalidCondType_NEG) +{ + Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(IfTest, InvalidCondElementNum_NEG) +{ + Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp new file mode 100644 index 000000000..22a329be6 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/InstanceNorm.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/common.h> +#include <cmath> + +namespace luci_interpreter +{ +namespace kernels +{ + +InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, + Tensor *output, const InstanceNormParams ¶ms) + : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params) +{ +} + +void InstanceNorm::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) || + gamma()->shape().dim(0) == 1); + LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) || + beta()->shape().dim(0) == 1); + output()->resize(input()->shape()); +} + +void InstanceNorm::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void InstanceNorm::evalFloat() const +{ + float activation_min, activation_max; + calculateActivationRange(params().activation, &activation_min, &activation_max); + auto input_shape = getTensorShape(input()); + auto output_shape = getTensorShape(output()); + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1); + const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2); + const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3); + const float *input_data = getTensorData<float>(input()); + const float *gamma_data = getTensorData<float>(gamma()); + auto gamma_shape = getTensorShape(gamma()); + bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1; + const float *beta_data = getTensorData<float>(beta()); + auto beta_shape = getTensorShape(beta()); + bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1; + float *output_data = getTensorData<float>(output()); + for (int32_t batch = 0; batch < batches; batch++) + { + for (int32_t channel = 0; channel < channels; channel++) + { + double sum = 0.0f; + double square_sum = 0.0f; + int32_t size = heights * widths; + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)]; + sum += input_val; + square_sum += (input_val * input_val); + } + } + double mean = sum / size; + double var = square_sum / size - mean * mean; + + double gamma = single_gamma ? gamma_data[0] : gamma_data[channel]; + double beta = single_beta ? beta_data[0] : beta_data[channel]; + double a = gamma / (std::sqrt(var + params().epsilon)); + double b = -mean * a + beta; + + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_value = + input_data[tflite::Offset(output_shape, batch, height, width, channel)]; + double output_value = input_value * a + b; + output_data[tflite::Offset(output_shape, batch, height, width, channel)] = + tflite::ActivationFunctionWithMinMax((float)output_value, activation_min, + activation_max); + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-interpreter/src/kernels/InstanceNorm.h new file mode 100644 index 000000000..a70a84e0a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H +#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class InstanceNorm : public KernelWithParams<InstanceNormParams> +{ +public: + InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output, + const InstanceNormParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *gamma() const { return _inputs[1]; } + const Tensor *beta() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp new file mode 100644 index 000000000..04400c3c0 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernels/InstanceNorm.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class InstanceNormTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(InstanceNormTest, Simple) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); +} + +TEST_F(InstanceNormTest, Single_gamma_beta) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2})); +} + +TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp index cfa535075..64222953f 100644 --- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp @@ -17,7 +17,7 @@ #include "kernels/L2Normalize.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALL2Normalize.h" #include <stdexcept> @@ -28,21 +28,22 @@ namespace kernels { L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms) - : KernelWithParams<L2NormParams>({input}, {output}, params) + : KernelWithParams<L2NormParams>({input}, {output}, params) { } void L2Normalize::configure() { - assert(input()->shape().num_dims() <= 4); - assert(output()->element_type() == DataType::FLOAT32 || output()->element_type() == DataType::U8); - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); if (output()->element_type() == DataType::U8) { - assert(output()->scale() == (1. / 128.)); - assert(output()->zero_point() == 128); + LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.)); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 128); } - assert(params().activation == Activation::NONE); + LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE); output()->resize(input()->shape()); } @@ -65,9 +66,9 @@ template <typename T> void L2Normalize::eval(int32_t zero_point) const { tflite::L2NormalizationParams op_params{}; op_params.input_zero_point = zero_point; - tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()), - getTensorData<T>(input()), getTensorShape(output()), - getTensorData<T>(output())); + luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()), + getTensorData<T>(input()), getTensorShape(output()), + getTensorData<T>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp index f53eaca94..6f960e8b4 100644 --- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp @@ -16,6 +16,7 @@ */ #include "kernels/L2Normalize.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,11 +27,13 @@ namespace using namespace testing; -TEST(L2NormalizeTest, Float) +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; - - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); L2NormParams params{}; @@ -38,16 +41,85 @@ TEST(L2NormalizeTest, Float) L2Normalize kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> quant_param = + quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - std::vector<float> ref_output_data{-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class L2NormalizeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(L2NormalizeTest, DataTypes); + +TYPED_TEST(L2NormalizeTest, Simple) +{ + Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, + {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}); } -// TODO Uint8Quantized -// Implement GetDequantizedOutput Function. -// Create Test for Uint8 Case +TEST(L2NormalizeTest, ActivationType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + L2NormParams params{}; + params.activation = Activation::RELU6; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} } // namespace } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp index 37a6ddedc..5a88808d5 100644 --- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALL2Pool2D.h" #include <stdexcept> @@ -30,14 +30,14 @@ namespace kernels { L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) - : KernelWithParams<Pool2DParams>({input}, {output}, params) + : KernelWithParams<Pool2DParams>({input}, {output}, params) { } void L2Pool2D::configure() { - assert(input()->shape().num_dims() == 4); - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); int batches = input()->shape().dim(0); int height = input()->shape().dim(1); @@ -49,13 +49,13 @@ void L2Pool2D::configure() int out_width, out_height; out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1); out_height = - computeOutputSize(padding, height, params().filter_height, params().stride_height, 1); + computeOutputSize(padding, height, params().filter_height, params().stride_height, 1); _padding_width = - computePadding(params().stride_width, 1, width, params().filter_width, out_width); + computePadding(params().stride_width, 1, width, params().filter_width, out_width); _padding_height = - computePadding(params().stride_height, 1, height, params().filter_height, out_height); + computePadding(params().stride_height, 1, height, params().filter_height, out_height); - assert(input()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); output()->resize({batches, out_height, out_width, channels_out}); } @@ -75,9 +75,9 @@ void L2Pool2D::execute() const op_params.padding_values.width = _padding_width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp index 06bb9388f..7245456cb 100644 --- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp @@ -17,6 +17,7 @@ #include "kernels/L2Pool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,14 +28,23 @@ namespace using namespace testing; -TEST(L2Pool2DTest, FloatNone) +class L2Pool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(L2Pool2DTest, FloatNone) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - 0, 6, 2, 4, // - 3, 2, 10, 7, // + 0, 6, 2, 4, // + 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -47,22 +57,23 @@ TEST(L2Pool2DTest, FloatNone) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.5}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu) +TEST_F(L2Pool2DTest, FloatRelu) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - -1, -6, 2, 4, // - -3, -2, 10, 7, // + -1, -6, 2, 4, // + -3, -2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -75,22 +86,23 @@ TEST(L2Pool2DTest, FloatRelu) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.53553, 6.5}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu1) +TEST_F(L2Pool2DTest, FloatRelu1) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - -0.1, -0.6, 2, 4, // - -0.3, -0.2, 10, 7, // + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -103,22 +115,23 @@ TEST(L2Pool2DTest, FloatRelu1) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.353553, 1.0}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu6) +TEST_F(L2Pool2DTest, FloatRelu6) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - -0.1, -0.6, 2, 4, // - -0.3, -0.2, 10, 7, // + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -131,22 +144,23 @@ TEST(L2Pool2DTest, FloatRelu6) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.353553, 6.0}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingSame) +TEST_F(L2Pool2DTest, FloatPaddingSame) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - 0, 6, 2, 4, // - 3, 2, 10, 7, // + 0, 6, 2, 4, // + 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -159,22 +173,23 @@ TEST(L2Pool2DTest, FloatPaddingSame) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.5}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingSameSlide1) +TEST_F(L2Pool2DTest, FloatPaddingSameStride) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - 0, 6, 2, 4, // - 3, 2, 10, 7, // + 0, 6, 2, 4, // + 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -187,22 +202,24 @@ TEST(L2Pool2DTest, FloatPaddingSameSlide1) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05 + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f)); // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingValidSlide1) +TEST_F(L2Pool2DTest, FloatPaddingValidStride) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - 0, 6, 2, 4, // - 3, 2, 10, 7, // + 0, 6, 2, 4, // + 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -215,14 +232,60 @@ TEST(L2Pool2DTest, FloatPaddingValidSlide1) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.0, 6.5}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); // TODO make a Shape checking of output_tensor. } +TEST_F(L2Pool2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp index 1a26debe0..3833a55e8 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp @@ -18,8 +18,9 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +#include "PALLeakyRelu.h" #include <stdexcept> @@ -30,13 +31,13 @@ namespace kernels { LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms) - : KernelWithParams<LeakyReluParams>({input}, {output}, params) + : KernelWithParams<LeakyReluParams>({input}, {output}, params) { } void LeakyRelu::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); if (input()->element_type() == DataType::U8) { double alpha_multiplier = input()->scale() * params().alpha / output()->scale(); @@ -66,9 +67,8 @@ void LeakyRelu::evalFloat() const { tflite::LeakyReluParams op_params{}; op_params.alpha = params().alpha; - tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); } void LeakyRelu::evalQuantized() const @@ -82,8 +82,8 @@ void LeakyRelu::evalQuantized() const op_params.output_shift_identity = _output_shift_identity; tflite::reference_ops::QuantizeLeakyRelu( - op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp index c79d3d6bc..0f6263b57 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp @@ -16,6 +16,7 @@ #include "kernels/LeakyRelu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -28,12 +29,13 @@ using namespace testing; template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, - std::initializer_list<T> input_data, std::initializer_list<T> output_data, float alpha, - DataType element_type) + std::initializer_list<float> input_data, std::initializer_list<float> output_data, + float alpha) { - Tensor input_tensor{element_type, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T)); - + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); LeakyReluParams params{}; @@ -42,32 +44,83 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int LeakyRelu kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - (void)output_shape; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); } -TEST(LeakReluTest, FloatSimple) +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data, float alpha) { - Check<float>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, /*input_data=*/ - { - 0.0f, 1.0f, 3.0f, // Row 1 - 1.0f, -1.0f, -2.0f, // Row 2 - }, - /*output_data=*/ - { - 0.0f, 1.0f, 3.0f, // Row 1 - 1.0f, -0.5f, -1.0f, // Row 2 - }, - /*alpha=*/0.5f, getElementType<float>()); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + LeakyReluParams params{}; + params.alpha = alpha; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <typename T> class LeakReluTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(LeakReluTest, DataTypes); + +TYPED_TEST(LeakReluTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -1.0f, // Row 2 + }, + /*alpha=*/0.5f); SUCCEED(); } -// TODO Uint8Simple -// Implement GetDequantizedOutput Function. -// Create Test for Uint8 Case +TEST(LeakReluTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LeakyReluParams params{}; + params.alpha = 0.5f; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} } // namespace } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp new file mode 100644 index 000000000..8d26ff297 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Less.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Less::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Less::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Less::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +template <typename T> void Less::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Less::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h new file mode 100644 index 000000000..e27bb689c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Less.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_H +#define LUCI_INTERPRETER_KERNELS_LESS_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Less : public Kernel +{ +public: + Less(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_H diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp new file mode 100644 index 000000000..8c5963363 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, true, // Row 1 + false, false, false, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp new file mode 100644 index 000000000..b474bc47a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void LessEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void LessEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LessEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void LessEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void LessEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h new file mode 100644 index 000000000..f82ea90d4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LessEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LessEqual : public Kernel +{ +public: + LessEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp new file mode 100644 index 000000000..b2e2fa7a1 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, true, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, true, // Row 1 + false, false, true, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp index 08efa1d6a..a2bf442b0 100644 --- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp +++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALLocalResponseNormalization.h" #include <stdexcept> @@ -29,16 +29,16 @@ namespace kernels { LocalResponseNormalization::LocalResponseNormalization( - const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms) - : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params) + const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms) + : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params) { } void LocalResponseNormalization::configure() { - assert(input()->shape().num_dims() == 4); - assert(output()->element_type() == DataType::FLOAT32); - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); output()->resize(input()->shape()); } @@ -52,9 +52,9 @@ void LocalResponseNormalization::execute() const op_params.bias = params().bias; op_params.alpha = params().alpha; op_params.beta = params().beta; - tflite::optimized_ops::LocalResponseNormalization( - op_params, getTensorShape(input()), getTensorData<float>(input()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::LocalResponseNormalization( + op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp index 4191bdb29..4a9d4739f 100644 --- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LocalResponseNormalization.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,10 +28,18 @@ namespace using namespace testing; -TEST(LocalResponseNormalizationTest, SameAsL2Norm) +class LocalResponseNormalizationTest : public ::testing::Test { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LocalResponseNormalizationTest, SameAsL2Norm) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -41,16 +50,17 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}))); + FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); } -TEST(LocalResponseNormalizationTest, WithAlpha) +TEST_F(LocalResponseNormalizationTest, WithAlpha) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -61,16 +71,17 @@ TEST(LocalResponseNormalizationTest, WithAlpha) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}))); + FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})); } -TEST(LocalResponseNormalizationTest, WithBias) +TEST_F(LocalResponseNormalizationTest, WithBias) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -81,16 +92,17 @@ TEST(LocalResponseNormalizationTest, WithBias) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}))); + FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})); } -TEST(LocalResponseNormalizationTest, SmallRadius) +TEST_F(LocalResponseNormalizationTest, SmallRadius) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -101,11 +113,43 @@ TEST(LocalResponseNormalizationTest, SmallRadius) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray( - ArrayFloatNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}))); + FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Log.cpp b/compiler/luci-interpreter/src/kernels/Log.cpp new file mode 100644 index 000000000..fa5f90e66 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Log.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Log.h" +#include "kernels/Utils.h" + +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Log::Log(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Log::configure() { output()->resize(input()->shape()); } + +void Log::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Log::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto input_shape = input()->shape(); + auto output_data = getTensorData<float>(output()); + for (int64_t i = 0; i < input_shape.num_elements(); ++i) + { + output_data[i] = std::log(input_data[i]); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Log.h b/compiler/luci-interpreter/src/kernels/Log.h new file mode 100644 index 000000000..49b293764 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Log.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOG_H +#define LUCI_INTERPRETER_KERNELS_LOG_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Log : public Kernel +{ +public: + Log(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOG_H diff --git a/compiler/luci-interpreter/src/kernels/Log.test.cpp b/compiler/luci-interpreter/src/kernels/Log.test.cpp new file mode 100644 index 000000000..3e360e098 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Log.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Log.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogTest, FloatSimple) +{ + std::vector<float> input_data{1, 3.1415926, 1, 1}; + + std::vector<float> ref_output_data{0, 1.14473, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Log kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 4, 1})); +} + +TEST_F(LogTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Log kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp new file mode 100644 index 000000000..79c315338 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/log_softmax.h> + +#include "PALLogSoftmax.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogSoftmax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 255); + + tflite::SoftmaxParams params{}; + + params.table = _table; + params.beta = 1.0; + luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta); + } + output()->resize(input()->shape()); +} + +void LogSoftmax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LogSoftmax::evalFloat() const +{ + tflite::SoftmaxParams params{}; + tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void LogSoftmax::evalQuantized() const +{ + const auto input_shape = getTensorShape(input()); + const auto output_shape = getTensorShape(output()); + const auto input_scale = input()->scale(); + uint8_t *output_data = getTensorData<uint8_t>(output()); + const uint8_t *input_data = getTensorData<uint8_t>(input()); + const float beta = 1.0; + + tflite::SoftmaxParams params{}; + + params.table = const_cast<float *>(_table); + params.zero_point = output()->zero_point(); + params.scale = output()->scale(); + + luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta); + luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-interpreter/src/kernels/LogSoftmax.h new file mode 100644 index 000000000..18477fbe3 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H +#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogSoftmax : public Kernel +{ +public: + LogSoftmax(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + + float _table[256]; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp new file mode 100644 index 000000000..50dcd5c28 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogSoftmaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogSoftmaxTest, Float) +{ + Shape input_shape{2, 4}; + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(LogSoftmaxTest, Uint8) +{ + float kMin = -10; + float kMax = 10; + float kLogSoftmaxQuantizedTolerance = 16. / 256; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax); + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111})); +} + +TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG) +{ + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10); + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp new file mode 100644 index 000000000..8e7263231 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalAnd::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalAnd::execute() const +{ + switch (input1()->element_type()) + { + case DataType::BOOL: + evalLogicalAnd(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalAnd::evalLogicalAnd() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()), + getTensorShape(input2()), getTensorData<bool>(input2()), + getTensorShape(output()), getTensorData<bool>(output()), + [](bool x, bool y) { return x && y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-interpreter/src/kernels/LogicalAnd.h new file mode 100644 index 000000000..46b889986 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H +#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalAnd : public Kernel +{ +public: + LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalAnd() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp new file mode 100644 index 000000000..21b7951e0 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalAndTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalAndTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor1 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, Broadcast) +{ + Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalAndTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.cpp new file mode 100644 index 000000000..65ab961aa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalNot.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogicalNot::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void LogicalNot::execute() const +{ + switch (input()->element_type()) + { + case DataType::BOOL: + evalLogicalNot(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalNot::evalLogicalNot() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + bool *output_data = getTensorData<bool>(output()); + const bool *input_data = getTensorData<bool>(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = !input_data[i]; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-interpreter/src/kernels/LogicalNot.h new file mode 100644 index 000000000..1608fafa5 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalNot.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H +#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalNot : public Kernel +{ +public: + LogicalNot(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalNot() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp new file mode 100644 index 000000000..3cbf27f6b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalNotTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalNotTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(false, true, true, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalNotTest, OutputTypeInvalid_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalNotTest, InputTypeInvalid_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp new file mode 100644 index 000000000..f289ca64f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" + +#include "kernels/Utils.h" +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalOr::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalOr::execute() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()), + getTensorShape(input2()), getTensorData<bool>(input2()), + getTensorShape(output()), getTensorData<bool>(output()), + [](bool x, bool y) { return x || y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-interpreter/src/kernels/LogicalOr.h new file mode 100644 index 000000000..88606483f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalOr.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H +#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalOr : public Kernel +{ +public: + LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp new file mode 100644 index 000000000..d65a69a5e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalOrTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalOrTest, Basic) +{ + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false}, + _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, true, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, Broadcast) +{ + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalOrTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp index c7d45615c..58e4f185d 100644 --- a/compiler/luci-interpreter/src/kernels/Logistic.cpp +++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/logistic.h> namespace luci_interpreter { @@ -29,10 +29,10 @@ Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {outpu void Logistic::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); if (input()->element_type() == DataType::U8) { - assert(output()->scale() == 1. / 256); + LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256); populateLookupTable(); } output()->resize(input()->shape()); diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp index 00feddf3d..5a1ea669c 100644 --- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Logistic.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,31 +27,121 @@ namespace using namespace testing; -TEST(LogisticTest, Float) +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Shape input_shape{1, 2, 4, 1}; - std::vector<float> input_data{ - 0, -6, 2, 4, // - 3, -2, 10, 1, // - }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType<T>()); Logistic kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - std::vector<float> ref_output_data{ - 0.5, 0.002473, 0.880797, 0.982014, // - 0.952574, 0.119203, 0.999955, 0.731059, // - }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); - // TODO make a Shape checking of output_tensor. + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale() * 2)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class LogisticTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(LogisticTest, DataTypes); + +TYPED_TEST(LogisticTest, Simple) +{ + Check<TypeParam>( + {89}, {89}, + {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636, + -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000, + -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364, + -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727, + -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091, + -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455, + -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818, + -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818, + 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455, + 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091, + 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727, + 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364, + 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000, + 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636, + 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000}, + {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198, + 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786, + 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065, + 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576, + 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562, + 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805, + 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241, + 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759, + 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195, + 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438, + 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424, + 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935, + 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214, + 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802, + 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021}); } -// TODO Uint8 -// Need to Implement GetDequantizedOutput Function. +TEST(LogisticTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape = {1}; + std::vector<float> input_data{10}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(LogisticTest, IvalidQuantParam_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape = {2}; + std::vector<float> input_data{-10, 10}; + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} } // namespace } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp index afecf9058..8d9760ff2 100644 --- a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp @@ -18,6 +18,7 @@ #include "kernels/Utils.h" +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> #include <tensorflow/lite/kernels/internal/reference/pooling.h> #include <stdexcept> @@ -29,13 +30,13 @@ namespace kernels { MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) - : KernelWithParams<Pool2DParams>({input}, {output}, params) + : KernelWithParams<Pool2DParams>({input}, {output}, params) { } void MaxPool2D::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); assert(input()->shape().num_dims() == 4); const Shape &input_shape = input()->shape(); const int32_t batches = input_shape.dim(0); @@ -43,21 +44,26 @@ void MaxPool2D::configure() const int32_t input_width = input_shape.dim(2); const int32_t depth = input_shape.dim(3); - const int32_t output_height = computeOutputSize(_params.padding, input_height, - _params.filter_height, _params.stride_height); + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); const int32_t output_width = - computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); _padding_height = - computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); _padding_width = - computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); output()->resize({batches, output_height, output_width, depth}); - if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8) + if (input()->element_type() == DataType::U8) { - assert(input()->scale() == output()->scale()); - assert(input()->zero_point() == output()->zero_point()); + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); } } @@ -71,6 +77,9 @@ void MaxPool2D::execute() const case DataType::U8: evalQuantized(); break; + case DataType::S16: + evalSInt16(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -116,5 +125,26 @@ void MaxPool2D::evalQuantized() const getTensorShape(output()), getTensorData<uint8_t>(output())); } +void MaxPool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::MaxPool( + params, getTensorShape(input()), getTensorData<int16_t>(input()), // + getTensorShape(output()), getTensorData<int16_t>(output())); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-interpreter/src/kernels/MaxPool2D.h index 7a59ff022..bb7666305 100644 --- a/compiler/luci-interpreter/src/kernels/MaxPool2D.h +++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.h @@ -39,6 +39,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalSInt16() const; private: int32_t _padding_height{}; diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp index 390255d89..44f2a222f 100644 --- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/MaxPool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,15 +27,24 @@ namespace using namespace testing; -TEST(MaxPool2DTest, Float) +class MaxPool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaxPool2DTest, Float) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ - 1, -1, 0, -2, 2, // - -7, -6, -5, -4, -3, // - 5, 4, 3, 6, 7, // + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -47,30 +57,28 @@ TEST(MaxPool2DTest, Float) MaxPool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 1, 2, // - 5, 6, // + 1, 2, // + 5, 6, // }; std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MaxPool2DTest, Uint8) +TEST_F(MaxPool2DTest, Uint8) { std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375); std::vector<float> input_data{ - 0, -6, 12, 4, // - -3, -2, 10, 7, // + 0, -6, 12, 4, // + -3, -2, 10, 7, // }; - Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - std::vector<uint8_t> quantize_input = - quantize<uint8_t>(input_data, quant_param.first, quant_param.second); - input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); Pool2DParams params{}; params.padding = Padding::VALID; @@ -82,14 +90,48 @@ TEST(MaxPool2DTest, Uint8) MaxPool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.0, 6.0}; std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1}; - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MaxPool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{ + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // + }; + std::vector<float> ref_output_data{ + 1, 2, // + 5, 6, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-interpreter/src/kernels/Maximum.cpp new file mode 100644 index 000000000..b102b5e27 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Maximum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Maximum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Maximum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMaximum<float>(); + break; + case DataType::U8: + evalMaximum<uint8_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void Maximum::evalMaximum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), + [](T x, T y) { return std::max(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-interpreter/src/kernels/Maximum.h new file mode 100644 index 000000000..3c99e69c7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Maximum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H +#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Maximum : public Kernel +{ +public: + Maximum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalMaximum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp new file mode 100644 index 000000000..e4a505b03 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MaximumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaximumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MaximumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; + std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp index 2394e2c0e..8e65e0d6d 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.cpp +++ b/compiler/luci-interpreter/src/kernels/Mean.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/reduce.h> #include <stdexcept> @@ -28,7 +28,7 @@ namespace luci_interpreter namespace kernels { -static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params) +static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params) { params->axis_count = num_axes; for (int i = 0; i < num_axes; ++i) @@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams * } // Returns the number of axes that will be reduced. Removes duplicates. -static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims) +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) { int reduction_count = num_axes; for (int i = 0; i < num_axes; ++i) @@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n return reduction_count; } -static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes, +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, bool keep_dims) { int input_num_dims = input_shape.num_dims(); @@ -123,15 +123,22 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int } } -Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams ¶ms) - : KernelWithParams<ReducerParams>({input, axes}, {output}, params) +Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum}, + params) { } void Mean::configure() { - assert(input()->element_type() == output()->element_type()); - assert(axes()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + const Shape &input_shape = input()->shape(); int input_num_dims = input_shape.num_dims(); @@ -144,18 +151,28 @@ void Mean::configure() tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); - const bool need_temporaries = - !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && - ((params.axis[0] == 1 && params.axis[1] == 2) || - (params.axis[0] == 2 && params.axis[1] == 1))); - if (need_temporaries) - { - _temp_index = - std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, ""); - _resolved_axes = - std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, ""); - _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(), - AffineQuantization{}, ""); + _need_temporaries = !( + _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1))); + if (_need_temporaries) + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); + temp_sum->resize(output()->shape()); + } + else + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->set_allocatable(false); + resolved_axes->set_allocatable(false); + temp_sum->set_allocatable(false); } } @@ -169,6 +186,9 @@ void Mean::execute() const case DataType::U8: evalQuantized(); break; + case DataType::S16: + evalQuantizedS16(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -184,6 +204,10 @@ void Mean::evalFloat() const tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + // Defer to specialized implementation for 4D Mean across axes 1 & 2. if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && ((params.axis[0] == 1 && params.axis[1] == 2) || @@ -194,12 +218,12 @@ void Mean::evalFloat() const } else { - tflite::reference_ops::Mean( - getTensorData<float>(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData<float>(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get())); + tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<float>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<float>(temp_sum)); } } @@ -213,6 +237,10 @@ void Mean::evalQuantized() const tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + // Defer to specialized implementation for 4D Mean across axes 1 & 2. if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && ((params.axis[0] == 1 && params.axis[1] == 2) || @@ -225,23 +253,92 @@ void Mean::evalQuantized() const } else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale()) { - tflite::reference_ops::Mean( - getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData<uint8_t>(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get())); + tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<uint8_t>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum)); } else { tflite::reference_ops::QuantizedMeanOrSum<>( - getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(), - getTensorShape(input()).DimsData(), input()->shape().num_dims(), - getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()), - /*compute_sum=*/false); + getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(), + getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, + _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum), + /*compute_sum=*/false); + } +} + +void Mean::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &output_shape = output()->shape(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + const int num_axes = axes()->shape().num_elements(); + + constexpr int32_t output_min = -std::numeric_limits<int16_t>::max(); + constexpr int32_t output_max = std::numeric_limits<int16_t>::max(); + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 && + ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1))) + { + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + assert(output_shape.num_dims() == 4); + assert(output_shape.dim(0) == batches); + assert(output_shape.dim(1) == 1); + assert(output_shape.dim(2) == 1); + assert(output_shape.dim(3) == depth); + + const double real_multiplier = + static_cast<double>(input()->scale()) / static_cast<double>(output()->scale()); + + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + const int32_t num_elements_in_axes = input_height * input_width; + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t c = 0; c < depth; ++c) + { + int32_t acc = 0; + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)]; + } + } + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + // Divide by the number of elements rounding to the nearest integer. + scaled_acc = scaled_acc > 0 + ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes + : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes; + + scaled_acc = std::max(scaled_acc, output_min); + scaled_acc = std::min(scaled_acc, output_max); + + output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc; + } + } + } + else + { + throw std::runtime_error("Unsupported configuration."); } } diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h index 9cc793c72..ed07ae561 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.h +++ b/compiler/luci-interpreter/src/kernels/Mean.h @@ -30,7 +30,8 @@ namespace kernels class Mean : public KernelWithParams<ReducerParams> { public: - Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams ¶ms); + Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *axes() const { return _inputs[1]; } @@ -42,11 +43,10 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _temp_index; - std::unique_ptr<Tensor> _resolved_axes; - std::unique_ptr<Tensor> _temp_sum; + bool _need_temporaries = false; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp index f4e411ca4..d2c00935a 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Mean.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,139 +28,213 @@ namespace using namespace testing; -TEST(MeanTest, FloatKeepDims) +class MeanTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MeanTest, FloatKeepDims) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{0, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{10.5, 12.5, 14.5}; std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, FloatKeepDims4DMean) +TEST_F(MeanTest, FloatKeepDims4DMean) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{6, 7, 18, 19}; std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, FloatNotKeepDims) +TEST_F(MeanTest, FloatNotKeepDims) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{1, 0, -3, -3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = false; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{12, 13}; std::initializer_list<int32_t> ref_output_shape{2}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, Uint8KeepDims) +TEST_F(MeanTest, Uint8KeepDims) { float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<int32_t> axis_data{1}; - Tensor input_tensor{DataType::U8, {3, 2}, {{quant_param.first}, {quant_param.second}}, ""}; - Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data); + Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::U8, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - std::vector<uint8_t> quantize_input = - quantize<uint8_t>(input_data, quant_param.first, quant_param.second); - input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.3, 0.35, 0.55}; std::initializer_list<int32_t> ref_output_shape{3, 1}; - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, Uint8NotKeepDims) +TEST_F(MeanTest, Uint8NotKeepDims) { float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<int32_t> axis_data{1}; - Tensor input_tensor{DataType::U8, {1, 3, 2}, {{quant_param.first}, {quant_param.second}}, ""}; - Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - std::vector<uint8_t> quantize_input = - quantize<uint8_t>(input_data, quant_param.first, quant_param.second); - input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); ReducerParams params{}; params.keep_dims = false; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.4, 0.4}; std::initializer_list<int32_t> ref_output_shape{1, 2}; - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } +TEST_F(MeanTest, SInt16KeepDims4D) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + std::vector<int32_t> axes_data{1, 2}; + std::vector<float> ref_output_data{6, 7, 18, 19}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get()); + Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-interpreter/src/kernels/Minimum.cpp new file mode 100644 index 000000000..5d3dcde72 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Minimum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Minimum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Minimum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMinimum<float>(); + break; + case DataType::U8: + evalMinimum<uint8_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void Minimum::evalMinimum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), + [](T x, T y) { return std::min(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-interpreter/src/kernels/Minimum.h new file mode 100644 index 000000000..5ff4035b4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Minimum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H +#define LUCI_INTERPRETER_KERNELS_MINIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Minimum : public Kernel +{ +public: + Minimum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalMinimum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp new file mode 100644 index 000000000..9a143643f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MinimumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MinimumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MinimumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; + std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp new file mode 100644 index 000000000..bae1eac70 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" + +#include "kernels/Utils.h" + +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms) + : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params) +{ +} + +void MirrorPad::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output); + +void MirrorPad::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output()); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); + + MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output) +{ + auto const input_dims = input.shape().num_dims(); + auto const input_data = input.data<T>(); + auto const paddings_data = paddings.data<int32_t>(); + auto const output_data = output.data<T>(); + + auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1; + auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1; + auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1; + auto const input_d = input.shape().dim(input_dims - 1); + + auto const input_h_offset = input_d * input_w; + auto const input_b_offset = input_h_offset * input_h; + + auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1; + auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1; + auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1; + auto const output_d = output.shape().dim(input_dims - 1); + + auto const left_b_pad = paddings_data[2 * (input_dims - 4)]; + auto const left_h_pad = paddings_data[2 * (input_dims - 3)]; + auto const left_w_pad = paddings_data[2 * (input_dims - 2)]; + auto const left_d_pad = paddings_data[2 * (input_dims - 1)]; + + auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1]; + auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1]; + auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1]; + auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1]; + + const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; }; + const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h, + auto b) { + return d + w * input_d + h * input_h_offset + b * input_b_offset; + }; + + const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) { + bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1; + return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input); + }; + + const T *in_ptr = input_data; + T *out_ptr = output_data; + + for (int32_t b = 0; b < output_b; ++b) + { + for (int32_t h = 0; h < output_h; ++h) + { + for (int32_t w = 0; w < output_w; ++w) + { + for (int32_t d = 0; d < output_d; ++d) + { + if (b < left_b_pad || b >= output_b - right_b_pad || // + h < left_h_pad || h >= output_h - right_h_pad || // + w < left_w_pad || w >= output_w - right_w_pad || // + d < left_d_pad || d >= output_d - right_d_pad) + { + if (mode == MirrorPadMode::REFLECT) + { + *out_ptr++ = input_data[offset_index( + positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w), + positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))]; + } + else + { + *out_ptr++ = input_data[offset_index( + symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w), + symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))]; + } + } + else + { + *out_ptr++ = *in_ptr++; + } + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-interpreter/src/kernels/MirrorPad.h new file mode 100644 index 000000000..d3e6e858a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H +#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class MirrorPad : public KernelWithParams<MirrorPadParams> +{ +public: + MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp new file mode 100644 index 000000000..740d8cb22 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MirrorPadTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode) + { + MirrorPadParams params{}; + params.mode = mode; + + MirrorPad kernel(&input, &padding, &output, params); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MirrorPadTest, FloatReflect) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 3.0, 3.0, 4.0, 4.0, 3.0, // + 3.0, 3.0, 4.0, 4.0, 3.0}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric2Dim) +{ + Shape input_shape = {3, 1}; + Shape padding_shape = {2, 2}; + + std::vector<float> input_data{1.0f, 2.0f, 3.0f}; + std::vector<int> padding_data{1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0}; + std::initializer_list<int32_t> ref_output_shape{6, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Reflect) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{ + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Symmetric) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{ + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, UnsupportedDim_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +TEST_F(MirrorPadTest, InvalidInputType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp index dd31aa099..531fb4fa1 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.cpp @@ -17,9 +17,11 @@ #include "kernels/Mul.h" +#include "kernels/BinaryOpCommon.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALMul.h" + #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> #include <stdexcept> @@ -30,13 +32,22 @@ namespace kernels { Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms) - : KernelWithParams<MulParams>({input1, input2}, {output}, params) + : KernelWithParams<MulParams>({input1, input2}, {output}, params) { } void Mul::configure() { - assert(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type()); + if (input1()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1) + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); + } + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); } @@ -47,6 +58,15 @@ void Mul::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::S16: + evalQuantizedS16(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -54,30 +74,77 @@ void Mul::execute() const void Mul::evalFloat() const { - float activation_min{}; - float activation_max{}; - calculateActivationRange(_params.activation, &activation_min, &activation_max); + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Mul::evalInteger() const +{ tflite::ArithmeticParams params{}; - params.float_activation_min = activation_min; - params.float_activation_max = activation_max; + fillArithmeticActivationRange<T>(params, _params.activation); const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( - getTensorShape(input1()), getTensorShape(input2()), ¶ms); + getTensorShape(input1()), getTensorShape(input2()), ¶ms); if (need_broadcast) { - tflite::optimized_ops::BroadcastMul4DSlow( - params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), - getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); } else { - tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), - getTensorShape(input2()), getTensorData<float>(input2()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); } } +void Mul::evalQuantizedS16() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_multiplier = input1_scale * input2_scale / output_scale; + + int32_t output_multiplier; + int output_shift; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val, + int16_t input2_val) { + int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val); + output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift); + output = std::max(output, activation_min); + output = std::min(output, activation_max); + return static_cast<int16_t>(output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()), + getTensorShape(input2()), getTensorData<int16_t>(input2()), + getTensorShape(output()), getTensorData<int16_t>(output()), fn); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h index e46160bcb..c0cf817df 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.h +++ b/compiler/luci-interpreter/src/kernels/Mul.h @@ -42,6 +42,8 @@ public: private: void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantizedS16() const; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp index f2255ac3f..fc0e60614 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Mul.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,26 +28,36 @@ namespace using namespace testing; -TEST(MulTest, Float) +class MulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MulTest, Float) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; std::vector<std::vector<float>> test_outputs = { - {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, - 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, - 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, - {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, - {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, - 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, - 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, - {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); MulParams params{}; @@ -54,17 +65,19 @@ TEST(MulTest, Float) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f))) - << "With shape number " << i; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; } // Re-run with exchanged inputs. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); MulParams params{}; @@ -72,12 +85,206 @@ TEST(MulTest, Float) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } +} + +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + dtype max_value = std::numeric_limits<dtype>::max(); + dtype res_max = max_value - max_value % 10; + + std::vector<std::vector<dtype>> test_outputs = { + {8, 0, 20, 0, 4, 30, // + 16, 0, 40, 3, 8, 0, // + 0, 0, 0, 6, 0, 0, // + 4, 0, 10, 9, 2, 0, // + 40, 0, 100, 0, 20, 150, // + 28, 0, 70, 0, 14, res_max}, + {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max}, + {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2, + 70, res_max}, + {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}}; + std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10}; + std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(MulTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<int32_t>> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<std::vector<float>> ref_outputs = { + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f))) - << "With shape number " << i; + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(MulTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(MulTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + MulParams params{}; + params.activation = Activation::NONE; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp new file mode 100644 index 000000000..c6fe08a9e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Neg.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/Utils.h" + +#include "PALNeg.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Neg::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void Neg::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Neg::evalFloat() const +{ + luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Neg.h b/compiler/luci-interpreter/src/kernels/Neg.h new file mode 100644 index 000000000..69fa1a18e --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Neg.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NEG_H +#define LUCI_INTERPRETER_KERNELS_NEG_H + +#include "core/Kernel.h" +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Neg : public Kernel +{ +public: + Neg(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NEG_H diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp new file mode 100644 index 000000000..8b2bc1a82 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Neg kernel(&input_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(NegTest, FloatSimple) +{ + Check<float>(/*input_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, -1.0f, -3.0f, // Row 1 + -1.0f, 1.0f, 2.0f, // Row 2 + }); + + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp new file mode 100644 index 000000000..54e5eee34 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void NotEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void NotEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void NotEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void NotEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void NotEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h new file mode 100644 index 000000000..d2aafe893 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/NotEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class NotEqual : public Kernel +{ +public: + NotEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp new file mode 100644 index 000000000..45bf4022a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class NotEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(NotEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + true, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(NotEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + true, true, true, // Row 2 + true, true, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + true, false, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(NotEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(NotEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(NotEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, true, true, // Row 1 + true, false, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, true, true, true, // Row 1 + true, true, false, true, // Row 2 + true, true, true, true, // Row 3 + false, false, false, false, // Row 4 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp new file mode 100644 index 000000000..4d3e5f2ef --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename T> +void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor, + const Tensor *off_value_tensor, int32_t depth, int32_t axis, + Tensor *output_tensor) +{ + // define input shape and correct axis + auto const &input_shape = indices_tensor->shape(); + axis = axis == -1 ? input_shape.num_dims() : axis; + + // TODO support other integer input types + auto const *indices = getTensorData<int32_t>(indices_tensor); + auto const on_value = getTensorData<T>(on_value_tensor)[0]; + auto const off_value = getTensorData<T>(off_value_tensor)[0]; + auto *output = getTensorData<T>(output_tensor); + + // prefix_dim_size == # of elements before the axis + // depth == # of elements per axis + // suffix_dim_size == # of elements after the axis + auto prefix_dim_size = 1; + for (int32_t i = 0; i < axis; ++i) + { + prefix_dim_size *= input_shape.dim(i); + } + assert(prefix_dim_size > 0); + auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size; + + // View the indices as a matrix of size: + // prefix_dim_size x suffix_dim_size + // View the output as a matrix of size: + // prefix_dim_size x depth x suffix_dim_size + // Then the output is: + // output(i, j, k) == (indices(i, k) == j) ? on : off + for (int32_t i = 0; i < prefix_dim_size; ++i) + for (int32_t j = 0; j < depth; ++j) + for (int32_t k = 0; k < suffix_dim_size; ++k, ++output) + *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value; +} + +} // namespace + +OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms) + : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params) +{ + // Do nothing +} + +void OneHot::configure() +{ + // check types + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type()); + + // check shape dependent parameters + LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims()); + + // define parameters that affect the output shape + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const &input_shape = indices()->shape(); + auto const input_dims = input_shape.num_dims(); + auto const axis = params().axis == -1 ? input_dims : params().axis; + + // define output shape + Shape output_shape(input_shape.num_dims() + 1); + { + for (int32_t d = 0; d < axis; ++d) + output_shape.dim(d) = input_shape.dim(d); + + output_shape.dim(axis) = depth_value; + + for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d) + output_shape.dim(d) = input_shape.dim(d - 1); + } + + // reshape output + output()->resize(output_shape); +} + +void OneHot::execute() const +{ + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const axis = params().axis; + + switch (output()->element_type()) + { + case loco::DataType::FLOAT32: + OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::U8: + OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::S16: + OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + default: + // TODO Support other data types + throw std::runtime_error("Not supported, yet!"); + break; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h new file mode 100644 index 000000000..572f857ae --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H +#define LUCI_INTERPRETER_KERNELS_ONEHOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class OneHot : public KernelWithParams<OneHotParams> +{ +public: + OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms); + + const Tensor *indices() const { return _inputs[0]; } + const Tensor *depth() const { return _inputs[1]; } + const Tensor *on_value() const { return _inputs[2]; } + const Tensor *off_value() const { return _inputs[3]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp new file mode 100644 index 000000000..45b6968fa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data, + std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data, + int32_t axis, std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr auto input_type = getElementType<T1>(); + constexpr auto output_type = getElementType<T2>(); + + Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + OneHotParams params{}; + params.axis = axis; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <typename T> class OneHotTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_SUITE(OneHotTest, DataTypes); + +TYPED_TEST(OneHotTest, BasicPattern) +{ + // axis 0 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/0, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 1, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 1, 0, // + 0, 1, 0, // + }); + // axis 1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/1, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + + 0, 0, 1, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + }); + // axis -1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/-1, + /*output_data=*/ + { + 1, 0, 0, 0, // + 0, 0, 0, 1, // + 0, 0, 0, 0, // + + 0, 0, 0, 0, // + 0, 0, 0, 1, // + 1, 0, 0, 0, // + }); +} + +TEST(OneHotTest, UnsupportedInputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + // input type should be integer + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get()); + + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, OutputTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + + // type of on_value, off_value and output_tensor should be same + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, InvalidAxis_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + // axis should be in [-1, input_shape.rank] + OneHotParams params = {-2}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-interpreter/src/kernels/PRelu.cpp new file mode 100644 index 000000000..5a6b05c3a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PRelu.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <tensorflow/lite/kernels/internal/reference/prelu.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +PRelu::~PRelu() +{ + // Destructor declared to delete vector of alpha quantized data properly +} + +void PRelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1); + LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1); + + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives + _alpha_multipliers.resize(1); + double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier, + &_alpha_multipliers[0].shift); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + else if (input()->element_type() == DataType::S16) + { + // Common check for correctness of quant params + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel) + { + LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0); + } + // PRelu specific checks for CWQ + LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1); + LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) == + alpha()->shape().dim(alpha()->quantized_dimension())); + LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() == + input()->shape().dim(input()->shape().num_dims() - 1)); + + // all dimension of alpha except last one should be size 1 + for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim) + { + LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1); + } + + std::vector<double> real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale()); + + _alpha_multipliers = quantizeMultipliers(real_multipliers); + + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape())); +} + +void PRelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void PRelu::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto alpha_data = getTensorData<float>(alpha()); + const auto size = getTensorShape(input()).FlatSize(); + auto output_data = getTensorData<float>(output()); + + auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; }; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>( + getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()), + getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()), + PReluFunc); + } + else + { + for (auto i = decltype(size){0}; i < size; ++i) + { + if (input_data[i] >= 0) + output_data[i] = input_data[i]; + else + output_data[i] = input_data[i] * alpha_data[i]; + } + } +} + +void PRelu::evalQuantized() const +{ + tflite::PreluParams op_params{}; + + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_shift_1 = _output_shift_identity; + op_params.output_multiplier_1 = _output_multiplier_identity; + op_params.output_shift_2 = _alpha_multipliers[0].shift; + op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastPrelu4DSlow( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()), + getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Prelu<uint8_t>( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()), + getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val, + const ChannelQuantMultipliers &identity_mult, + const ChannelQuantMultipliers &alpha_mult) +{ + constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min(); + constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max(); + + const int32_t output_val = + input_val >= 0 + ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val), + identity_mult.multiplier, identity_mult.shift) + : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val), + alpha_mult.multiplier, alpha_mult.shift); + const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val)); + return clamped_output; +} + +void PRelu::evalQuantizedS16() const +{ + // Note that this kernel assumes alpha is CWQ + tflite::RuntimeShape input_shape = getTensorShape(input()); + const int16_t *input_data = input()->data<int16_t>(); + const int16_t *alpha_data = alpha()->data<int16_t>(); + int16_t *output_data = output()->data<int16_t>(); + + const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity}; + + const int last_dim = input()->shape().num_dims() - 1; + + int32_t outer_dims_size = 1; + for (int i = 0; i < last_dim; ++i) + outer_dims_size *= input_shape.Dims(i); + int32_t quant_dim_size = input_shape.Dims(last_dim); + + for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims) + for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel) + { + const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel]; + size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size); + offset += quant_channel; + + output_data[offset] = + evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-interpreter/src/kernels/PRelu.h new file mode 100644 index 000000000..f7735d418 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PRelu.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H +#define LUCI_INTERPRETER_KERNELS_PRELU_H + +#include "core/Kernel.h" +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class ChannelQuantMultipliers; + +class PRelu : public Kernel +{ +public: + PRelu(const Tensor *input, const Tensor *alpha, Tensor *output); + + ~PRelu(); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *alpha() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + std::vector<ChannelQuantMultipliers> _alpha_multipliers; + // TODO merge this into one ChannelQuantMultiplier object + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PRELU_H diff --git a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp new file mode 100644 index 000000000..6d97382de --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, + std::initializer_list<T> alpha_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(PReluTest, FloatSimple) +{ + Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*alpha_data=*/ + { + 0.0f, 0.5f, 0.1f, // Row 1 + 0.0f, 0.5f, 0.1f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -0.2f, // Row 2 + }); + + SUCCEED(); +} + +TEST(PReluTest, FloatBroadcast) +{ + Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3}, + /*output_shape=*/{1, 2, 2, 3}, + /*input_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -2.0f, -2.0f, -2.0f, // Row 2, Column 2 + }, + /*alpha_data=*/ + {0.0f, 1.0f, 2.0f}, + /*output_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + 0.0f, -1.0f, -2.0f, // Row 2, Column 1 + 0.0f, -2.0f, -4.0f, // Row 2, Column 2 + }); + + SUCCEED(); +} + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PReluTest, Uint8Simple) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f}; + + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1})); + + SUCCEED(); +} + +TEST(PReluTest, Uint8Broadcast) +{ + std::vector<float> input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector<float> alpha_data{0.0f, 0.5f, -0.5f}; + std::vector<float> ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + std::vector<float> ref_quant_output_data{ + 128, 128, 128, // Row 1, Column 1 + 192, 192, 192, // Row 1, Column 2 + 128, 64, 192, // Row 2, Column 1 + 128, 112, 144 // Row 2, Column 2 + }; + float kQuantizedTolerance = 2 * (1. / 256); + const float kMin = -1; + const float kMax = 127.f / 128.f; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax); + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_quant_output_data)); +} + +TEST(PReluTest, SInt16_LWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + // Rewrite this test in case layer-wise quantization for sint16 is supported + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_Simple) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector<float> alpha_scales{0.05f, 0.025f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + std::vector<float> alpha_scales{0.25f, 0.05f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + std::vector<float> alpha_scales{0.25f}; + std::vector<int32_t> zerop{0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape1) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector<float> alpha_scales{0.05f, 0.025f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape2) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector<float> alpha_data{0.0f, 0.5f, -0.5f}; + std::vector<float> ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + + std::vector<float> alpha_scales{1.f, 0.05f, 0.1f}; + std::vector<int32_t> zerop{0, 0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, Input_Output_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Alpha_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST(PReluTest, Input_Output_U8_CWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> scales{1.f, 1.f}; + std::vector<int32_t> zerop{0, 0}; + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Output_S16_CWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> scales{1.f, 1.f}; + std::vector<int32_t> zerop{0, 0}; + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Mixing_U8_S16_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp new file mode 100644 index 000000000..42aab330c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pack.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms) + : KernelWithParams<PackParams>(std::move(inputs), {output}, params) +{ +} + +void Pack::configure() +{ + LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count)); + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims()); + + if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 && + t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 && + t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64) + { + throw std::runtime_error("Unsupported type."); + } + + for (uint32_t i = 1; i < _inputs.size(); ++i) + { + const Tensor *tensor = _inputs[i]; + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); + for (int d = 0; d < t0->shape().num_dims(); ++d) + { + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); + } + } + + Shape output_shape(dimension_size); + int i = 0; + for (int index = 0; index < dimension_size; ++index) + { + if (index == axis) + { + output_shape.dim(index) = params().values_count; + } + else + { + output_shape.dim(index) = t0->shape().dim(i++); + } + } + + if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 || + t0->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale()); + // Guarantee input/output quantization params match as we do not support + // packing quantized tensors. + for (int i = 0; i < params().values_count; i++) + { + LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale()); + } + } + + output()->resize(output_shape); +} + +void Pack::execute() const +{ + switch (_inputs[0]->element_type()) + { + case DataType::FLOAT32: + evalGeneric<float>(); + break; + case DataType::U8: + evalGeneric<uint8_t>(); + break; + case DataType::S8: + evalGeneric<int8_t>(); + break; + case DataType::S16: + evalGeneric<int16_t>(); + break; + case DataType::S32: + evalGeneric<int32_t>(); + break; + case DataType::S64: + evalGeneric<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Pack::evalGeneric() const +{ + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + + VectorOfTensors<T, true> inputs(_inputs); + tflite::PackParams params{}; + params.axis = axis; + params.inputs_count = _inputs.size(); + tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()), + getTensorData<T>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pack.h b/compiler/luci-interpreter/src/kernels/Pack.h new file mode 100644 index 000000000..4a2fcfd80 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pack.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PACK_H +#define LUCI_INTERPRETER_KERNELS_PACK_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pack : public KernelWithParams<PackParams> +{ +public: + Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalGeneric() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PACK_H diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp new file mode 100644 index 000000000..d16320b78 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::vector<std::initializer_list<int32_t>> input_shapes, + std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas, + std::initializer_list<T> output_data, int32_t axis) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + std::vector<const Tensor *> inputs(input_datas.size()); + std::vector<Tensor> tmp_inputs; + for (int i = 0; i < input_datas.size(); i++) + { + if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value || + std::is_same<T, int64_t>::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else + { + assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested"); + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + } + for (int i = 0; i < input_datas.size(); i++) + { + inputs[i] = &tmp_inputs[i]; + } + + Tensor output_tensor = makeOutputTensor(element_type); + if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128); + } + else if (std::is_same<T, int16_t>::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f, 0); + } + + PackParams params{}; + params.axis = axis; + params.values_count = input_datas.size(); + Pack kernel(inputs, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class PackTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>; +TYPED_TEST_SUITE(PackTest, DataTypes); + +TYPED_TEST(PackTest, ThreeInputs) +{ + Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{3, 2}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 4, 2, 5, 3, 6}, /*axis=*/0); + + SUCCEED(); +} + +TYPED_TEST(PackTest, NegAxis) +{ + Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{2, 3}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 2, 3, 4, 5, 6}, /*axis=*/-1); + + SUCCEED(); +} + +TEST(Pack, MismatchingInputValuesCount_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input1_data{1, 4}; + std::vector<float> input2_data{2, 5}; + std::vector<float> input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 0; + params.values_count = 2; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +TEST(Pack, InvalidInputAxis_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input1_data{1, 4}; + std::vector<float> input2_data{2, 5}; + std::vector<float> input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 2; + params.values_count = 3; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp index bdf3a2a95..c07f6e310 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.cpp @@ -18,7 +18,9 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/pad.h> + +#include <limits> namespace luci_interpreter { @@ -26,7 +28,7 @@ namespace kernels { Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output) - : Kernel({input, paddings}, {output}) + : Kernel({input, paddings}, {output}) { } @@ -93,6 +95,16 @@ void Pad::execute() const getTensorData<uint8_t>(output())); break; } + case DataType::S8: + { + assert(output()->zero_point() >= std::numeric_limits<int8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<int8_t>::max()); + const auto pad_value = static_cast<int8_t>(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<int8_t>(output())); + break; + } default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp index 15fcd0da3..dd3ce947c 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Pad.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,47 +31,76 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; } TEST(Pad, Uint8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; - Tensor input_tensor{DataType::U8, {1, 2, 3, 1}, {{quant_param.first}, {quant_param.second}}, ""}; - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); - std::vector<uint8_t> quantize_input = - quantize<uint8_t>(input_data, quant_param.first, quant_param.second); - input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); } +TEST(Pad, Int8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2}; + std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0}; + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0, + 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1})); +} + TEST(Pad, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6}; std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp new file mode 100644 index 000000000..197cdaa69 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/pad.h> + +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, + Tensor *output) + : Kernel({input, paddings, constant_values}, {output}) +{ +} + +void PadV2::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + assert(constant_values()->element_type() == output()->element_type()); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + // Constant values elements number should be 1. + assert(constant_values()->shape().num_elements() == 1); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +void PadV2::execute() const +{ + const int num_dims = input()->shape().num_dims(); + + tflite::PadParams params{}; + params.left_padding_count = num_dims; + params.right_padding_count = num_dims; + + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = num_dims - 1; i >= 0; --i) + { + params.left_padding[i] = paddings_data[i * 2]; + params.right_padding[i] = paddings_data[i * 2 + 1]; + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + const auto pad_value = getTensorData<float>(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()), + &pad_value, getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); + const auto pad_value = getTensorData<uint8_t>(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-interpreter/src/kernels/PadV2.h new file mode 100644 index 000000000..48a31f584 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PadV2.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H +#define LUCI_INTERPRETER_KERNELS_PAD_V2_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class PadV2 : public Kernel +{ +public: + PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + const Tensor *constant_values() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H diff --git a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp new file mode 100644 index 000000000..41efaff06 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PadV2, Uint8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; + std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; + std::vector<float> constant_values_data{0.5}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = makeInputTensor<DataType::U8>( + {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data = { + 0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5, // + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}; // + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); +} + +TEST(PadV2, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6}; + std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; + std::vector<float> constant_values_data{7}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = + makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-interpreter/src/kernels/Pow.cpp new file mode 100644 index 000000000..722c64024 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pow.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Pow::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Pow::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + eval<float>(); + break; + case DataType::S32: + eval<int32_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Pow::eval() const +{ + tflite::ArithmeticParams params{}; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Pow.h b/compiler/luci-interpreter/src/kernels/Pow.h new file mode 100644 index 000000000..8ff865e40 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pow.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_POW_H +#define LUCI_INTERPRETER_KERNELS_POW_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pow : public Kernel +{ +public: + Pow(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void eval() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_POW_H diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp new file mode 100644 index 000000000..0e858115d --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class PowTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(PowTest, SimplePow) +{ + std::initializer_list<int32_t> base_shape = {1, 1, 3, 2}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, FloatBroadcastPow) +{ + std::initializer_list<int32_t> input1_shape = {1, 3}; + std::initializer_list<int32_t> input2_shape = {3, 1}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f}; + std::vector<float> input2_data{0.2f, 0.3f, 0.4f}; + std::vector<float> test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f, + 0.96888f, 0.6178f, 1.3953f, 0.9587f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(PowTest, IntPow) +{ + std::initializer_list<int32_t> base_shape = {1, 3}; + + std::vector<int32_t> input_data{2, 3, 4}; + std::vector<int32_t> test_outputs{4, 27, 256}; + + Tensor input1_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Input_Type_Mismatch_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp new file mode 100644 index 000000000..0c8544a65 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/Utils.h" +#include "PALQuantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output) +{ + int32_t multiplier; + int shift; + + const double effective_output_scale = input->scale() / output->scale(); + quantizeMultiplier(effective_output_scale, &multiplier, &shift); + + const auto input_shape = getTensorShape(input); + const auto output_shape = getTensorShape(output); + const auto size = tflite::MatchingFlatSize(input_shape, output_shape); + + const auto input_data = getTensorData<input_dtype>(input); + + switch (output->element_type()) + { + case loco::DataType::S8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int8_t>(output)); + break; + case loco::DataType::U8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<uint8_t>(output)); + break; + case loco::DataType::S16: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int16_t>(output)); + break; + default: + throw std::runtime_error("Unsupported quantized type, yet!"); + } +} + +} // namespace + +Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Quantize::configure() +{ + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::S16); + break; + } + case loco::DataType::S16: + case loco::DataType::S8: + case loco::DataType::U8: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S16); + if (output()->element_type() == loco::DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == 0); + } + break; + } + default: + throw std::runtime_error("Unsupported type"); + } + + output()->resize(input()->shape()); +} + +void Quantize::execute() const +{ + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + tflite::QuantizationParams op_params; + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + const auto input_data = getTensorData<float>(input()); + + switch (output()->element_type()) + { + case loco::DataType::S8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + } + case loco::DataType::U8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<int16_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } + break; + } + case loco::DataType::S16: + { + call_requantize<int16_t>(input(), output()); + break; + } + case loco::DataType::S8: + { + call_requantize<int8_t>(input(), output()); + break; + } + case loco::DataType::U8: + { + call_requantize<uint8_t>(input(), output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h new file mode 100644 index 000000000..006c5366f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Quantize : public Kernel +{ +public: + Quantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp new file mode 100644 index 000000000..22e67fe3f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class QuantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(QuantizeTest, FloatUint8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt16) +{ + std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64}; + + std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200, + 200, 400, 600, 12700, 12800}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, Int16Int16) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int8Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Uint8Uint8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int16Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp new file mode 100644 index 000000000..d58cd1563 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reduce.h> + +#include <stdexcept> +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params) +{ +} + +void ReduceMax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + LUCI_INTERPRETER_CHECK(num_axes <= 4); + + // We compute shapes of outputs in configure, assuming that outputs have + // static shape + // TODO Support dynamic shape + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); +} + +void ReduceMax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + // TODO Support quantized kernels + default: + throw std::runtime_error("Unsupported type."); + } +} + +void ReduceMax::evalFloat() const +{ + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + int num_resolved_axis = 0; + LUCI_INTERPRETER_CHECK( + tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes, + getTensorData<int>(resolved_axes), &num_resolved_axis)); + + float init_value = std::numeric_limits<float>::lowest(); + tflite::reference_ops::ReduceGeneric<float>( + getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<float>(output()), getTensorShape(output()).DimsData(), + output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value, + [](const float current, const float in) -> float { return (in > current) ? in : current; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h new file mode 100644 index 000000000..25a66278a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H +#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReduceMax : public KernelWithParams<ReducerParams> +{ +public: + ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp new file mode 100644 index 000000000..ab688827b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReduceMaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReduceMaxTest, FloatNotKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{23, 24}; + std::initializer_list<int32_t> ref_output_shape{2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ReduceMaxTest, FloatKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{20, 22, 24}; + std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.cpp new file mode 100644 index 000000000..f3fc7d3f1 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceProd.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceProd.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reduce.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +ReduceProd::ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params) +{ +} + +void ReduceProd::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + LUCI_INTERPRETER_CHECK(num_axes <= 4); + + // We compute shapes of outputs in configure, assuming that outputs have + // static shape + // TODO Support dynamic shape + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); +} + +void ReduceProd::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + // TODO Support quantized kernels + default: + throw std::runtime_error("Unsupported type."); + } +} + +void ReduceProd::evalFloat() const +{ + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + int num_resolved_axis = 0; + LUCI_INTERPRETER_CHECK( + tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes, + getTensorData<int>(resolved_axes), &num_resolved_axis)); + + float init_value = 1.0; + tflite::reference_ops::ReduceGeneric<float>( + getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<float>(output()), getTensorShape(output()).DimsData(), + output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value, + [](const float current, const float in) -> float { return current * in; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.h b/compiler/luci-interpreter/src/kernels/ReduceProd.h new file mode 100644 index 000000000..d2f58cc0a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceProd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H +#define LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReduceProd : public KernelWithParams<ReducerParams> +{ +public: + ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp new file mode 100644 index 000000000..fa46f394d --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceProd.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReduceProdTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReduceProdTest, FloatNotKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.162341376e+11, 1.9619905536e+12}; + std::initializer_list<int32_t> ref_output_shape{2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ReduceProdTest, FloatKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{7.74592e+06, 1.197504e+08, 6.6889152e+08}; + std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ReduceProdTest, Input_Output_Type_NEG) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ReduceProdTest, Invalid_Axes_Type_NEG) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int64_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp new file mode 100644 index 000000000..747ec6cc8 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/Utils.h" + +#include "PALRelu.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData<float>(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data); +} + +void Relu::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset); + params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max()); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void Relu::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + auto *output_data = getTensorData<int16_t>(output()); + + constexpr int32_t output_min = 0; + constexpr int32_t output_max = std::numeric_limits<int16_t>::max(); + + const int32_t num_elements = input()->shape().num_elements(); + + for (int32_t i = 0; i < num_elements; ++i) + { + const int32_t input_val = input_data[i]; + int32_t output_val = + tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift); + output_val = std::max(output_val, output_min); + output_val = std::min(output_val, output_max); + output_data[i] = static_cast<int16_t>(output_val); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Relu.h b/compiler/luci-interpreter/src/kernels/Relu.h new file mode 100644 index 000000000..b813f0cdf --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU_H +#define LUCI_INTERPRETER_KERNELS_RELU_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu : public Kernel +{ +public: + Relu(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU_H diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp new file mode 100644 index 000000000..bd32e3cc9 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReluTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReluTest, FloatSimple) +{ + std::vector<float> input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector<float> ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(ReluTest, Uint8Quantized) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 8; + const float f_max = (127.0 / 128.0) * 8; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, Uint8Requantized) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 8; + const float in_max = (127.0 / 128.0) * 8; + const float out_min = (0.0 / 256.0) * 8; + const float out_max = (255.0 / 256.0) * 8; + + std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, SInt16) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + std::vector<float> ref_output_data{ + 0, 0, 2, 4, // + 3, 0, 7, 1, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(ReluTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ReluTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp new file mode 100644 index 000000000..07205ed3a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/Utils.h" + +#include "PALRelu6.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu6::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + if (input()->element_type() == DataType::U8) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu6::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu6::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData<float>(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data); +} + +void Relu6::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset); + params.quantized_activation_max = + std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()), + params.output_offset + static_cast<int32>(roundf(6.f / output()->scale()))); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-interpreter/src/kernels/Relu6.h new file mode 100644 index 000000000..f5030b588 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu6.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H +#define LUCI_INTERPRETER_KERNELS_RELU6_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu6 : public Kernel +{ +public: + Relu6(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU6_H diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp new file mode 100644 index 000000000..af7b3f3db --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class Relu6Test : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Relu6Test, FloatSimple) +{ + std::vector<float> input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 7.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector<float> ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 6.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(Relu6Test, Uint8Quantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 10; + const float f_max = (127.0 / 128.0) * 10; + const float tolerance = (f_max - f_min) / 255.0; + + std::vector<float> input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Uint8Requantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 10; + const float in_max = (127.0 / 128.0) * 10; + const float out_min = (0.0 / 256.0) * 0; + const float out_max = (255.0 / 256.0) * 6; + const float tolerance = (in_max - in_min) / 255.0; + + std::vector<float> input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu6 kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Relu6Test, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-interpreter/src/kernels/Reshape.cpp index d88b5392a..d3234e483 100644 --- a/compiler/luci-interpreter/src/kernels/Reshape.cpp +++ b/compiler/luci-interpreter/src/kernels/Reshape.cpp @@ -17,6 +17,8 @@ #include "kernels/Reshape.h" +#include "kernels/Utils.h" + #include <cassert> #include <cstring> @@ -28,12 +30,26 @@ namespace kernels static Shape extractShapeFromTensor(const Tensor *tensor) { - assert(tensor->element_type() == DataType::S32); Shape shape(tensor->shape().num_elements()); - const auto *shape_data = tensor->data<int32_t>(); - for (int i = 0; i < tensor->shape().num_elements(); ++i) + if (tensor->element_type() == DataType::S32) + { + const auto *shape_data = tensor->data<int32_t>(); + for (int i = 0; i < tensor->shape().num_elements(); ++i) + { + shape.dim(i) = shape_data[i]; + } + } + else if (tensor->element_type() == DataType::S64) + { + const auto *shape_data = tensor->data<int64_t>(); + for (int i = 0; i < tensor->shape().num_elements(); ++i) + { + shape.dim(i) = static_cast<int32_t>(shape_data[i]); + } + } + else { - shape.dim(i) = shape_data[i]; + LUCI_INTERPRETER_CHECK(false); } return shape; } @@ -65,7 +81,7 @@ static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shap } Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output) - : Kernel({input, shape}, {output}) + : Kernel({input, shape}, {output}) { } diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp index 7255b8132..7c0522ebe 100644 --- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Reshape.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,42 +27,90 @@ namespace using namespace testing; +class ReshapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + // TODO Test types other than FLOAT32. -TEST(ReshapeTest, Regular) +TEST_F(ReshapeTest, Regular) { Shape input_shape{1, 2, 2, 3}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape shape_shape{2}; std::vector<int32_t> shape_data{3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(input_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); } -TEST(ReshapeTest, UnknownDimension) +TEST_F(ReshapeTest, UnknownDimension) { Shape input_shape{2, 1, 2, 3}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape shape_shape{3}; std::vector<int32_t> shape_data{2, -1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); +} + +TEST_F(ReshapeTest, SupportS64) +{ + Shape input_shape{2, 1, 2, 3}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{3}; + std::vector<int64_t> shape_data{2, -1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S64>(shape_shape, shape_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(input_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); +} + +TEST_F(ReshapeTest, SupportS16_NEG) +{ + Shape input_shape{2, 1, 2, 3}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{3}; + std::vector<int16_t> shape_data{2, -1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S16>(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp new file mode 100644 index 000000000..e2ddd6a7b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" + +#include "kernels/Utils.h" + +#include "PALResizeBilinear.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output, + const ResizeBilinearParams ¶ms) + : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params) +{ +} + +void ResizeBilinear::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + if (params().half_pixel_centers && params().align_corners) + throw std::runtime_error("If half_pixel_centers is True, align_corners must be False."); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData<int32_t>(size())[0]; + output_shape.dim(2) = getTensorData<int32_t>(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeBilinear::execute() const +{ + tflite::ResizeBilinearParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h new file mode 100644 index 000000000..b7bdc2ab7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeBilinear : public KernelWithParams<ResizeBilinearParams> +{ +public: + ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeBilinearParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp new file mode 100644 index 000000000..933a1128c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, + std::initializer_list<float> output_data, bool align_corners, + bool half_pixel_centers) +{ + // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero + // point 0. + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class ResizeBilinearTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes); + +TYPED_TEST(ResizeBilinearTest, SimpleTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + 4, 8, 10, // + 8, 12, 14, // + 10, 14, 16, // + }, + false, false); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterFloatTest) +{ + Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 1, 2, // + 3, 4, // + 1, 2, // + 3, 4 // + }, + {3, 3}, + { + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterUint8Test) +{ + Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 12, 16 // + }, + {3, 3}, + { + 2, 4, 6, // + 6, 7, 9, // + 9, 10, 12, // + 4, 7, 10, // + 8, 10, 13, // + 12, 14, 16, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, InputShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeDimInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, InvalidParams_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = true; + params.half_pixel_centers = true; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp new file mode 100644 index 000000000..306cefbc2 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> +#include "PALResizeNearestNeighbor.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size, + Tensor *output, + const ResizeNearestNeighborParams ¶ms) + : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params) +{ +} + +void ResizeNearestNeighbor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData<int32_t>(size())[0]; + output_shape.dim(2) = getTensorData<int32_t>(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeNearestNeighbor::execute() const +{ + tflite::ResizeNearestNeighborParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h new file mode 100644 index 000000000..137d031cf --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams> +{ +public: + ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeNearestNeighborParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp new file mode 100644 index 000000000..7ade02a6f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, + std::initializer_list<float> output_data, bool align_corners, + bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> quant_param = + quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class ResizeNearestNeighborTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes); + +TYPED_TEST(ResizeNearestNeighborTest, SimpleTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12, // + 4, 4, 10, // + 4, 4, 10, // + 10, 10, 16, // + }, + false, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + true, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + false, true); +} + +TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Reverse.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.cpp index a46308412..1b6a5cc3b 100644 --- a/compiler/luci-interpreter/src/kernels/Reverse.cpp +++ b/compiler/luci-interpreter/src/kernels/ReverseV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "kernels/Reverse.h" +#include "kernels/ReverseV2.h" #include "kernels/Utils.h" #include <tensorflow/lite/kernels/internal/reference/reference_ops.h> @@ -24,12 +24,12 @@ namespace luci_interpreter namespace kernels { -Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output) - : Kernel({input, axes}, {output}) +ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output) + : Kernel({input, axes}, {output}) { } -void Reverse::configure() +void ReverseV2::configure() { assert(axes()->shape().num_dims() == 1); assert(input()->shape().num_dims() >= axes()->shape().num_elements()); @@ -57,7 +57,7 @@ void Reverse::configure() output()->resize(input()->shape()); } -void Reverse::execute() const +void ReverseV2::execute() const { int axis_value = getTensorData<int32_t>(axes())[0]; switch (output()->element_type()) @@ -69,8 +69,8 @@ void Reverse::execute() const break; case DataType::U8: tflite::reference_ops::Reverse<uint8_t>( - axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()), - getTensorShape(output()), getTensorData<uint8_t>(output())); + axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported output type"); diff --git a/compiler/luci-interpreter/src/kernels/Reverse.h b/compiler/luci-interpreter/src/kernels/ReverseV2.h index 3489dae28..51211c703 100644 --- a/compiler/luci-interpreter/src/kernels/Reverse.h +++ b/compiler/luci-interpreter/src/kernels/ReverseV2.h @@ -24,10 +24,10 @@ namespace luci_interpreter namespace kernels { -class Reverse : public Kernel +class ReverseV2 : public Kernel { public: - Reverse(const Tensor *input, const Tensor *axes, Tensor *output); + ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output); const Tensor *input() const { return _inputs[0]; } const Tensor *axes() const { return _inputs[1]; } diff --git a/compiler/luci-interpreter/src/kernels/Reverse.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp index 5475a8bd3..c0025faca 100644 --- a/compiler/luci-interpreter/src/kernels/Reverse.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp @@ -15,8 +15,9 @@ * limitations under the License. */ -#include "kernels/Reverse.h" +#include "kernels/ReverseV2.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,15 +28,17 @@ namespace using namespace testing; -template <typename T> class ReverseTest : public ::testing::Test +template <typename T> class ReverseV2Test : public ::testing::Test { }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(ReverseTest, DataTypes); +TYPED_TEST_SUITE(ReverseV2Test, DataTypes); -TYPED_TEST(ReverseTest, MultiDimensions) +TYPED_TEST(ReverseV2Test, MultiDimensions) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + // TypeParam std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; @@ -47,13 +50,15 @@ TYPED_TEST(ReverseTest, MultiDimensions) 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}; std::vector<int32_t> output_shape{4, 3, 2}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); - Reverse kernel = Reverse(&input_tensor, &axis_tensor, &output_tensor); + ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp index 69b55d2f2..3c6494232 100644 --- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Rsqrt.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,39 +30,42 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Rsqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } TEST(RsqrtTest, SimpleRsqrt) { Check( - /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, - /*input_data=*/ - { - 5, 4, 8, 2, // - 6, 7.5, 9, 0.3, // - }, - /*output_data=*/ - { - 0.44721360, 0.5, 0.35355339, 0.70710678, // - 0.40824829, 0.36514837, 0.33333333, 1.8257419, // - }); + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 5, 4, 8, 2, // + 6, 7.5, 9, 0.3, // + }, + /*output_data=*/ + { + 0.44721360, 0.5, 0.35355339, 0.70710678, // + 0.40824829, 0.36514837, 0.33333333, 1.8257419, // + }); } TEST(RsqrtTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Rsqrt kernel(&input_tensor, &output_tensor); @@ -70,11 +74,14 @@ TEST(RsqrtTest, Input_Output_Type_NEG) TEST(RsqrtTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Rsqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp new file mode 100644 index 000000000..b124e242c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/Utils.h" +#include "PALSVDF.h" + +#include <tensorflow/lite/kernels/internal/quantization_util.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms) + : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state}, + {output, scratchpad_activation_state, scratchpad_1, scratchpad_2, + scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6}, + params) +{ + // Do nothing +} + +void SVDF::configure() +{ + const Shape &input_shape = input()->shape(); + const Shape &weight_features_shape = weight_feature()->shape(); + const Shape &weight_time_shape = weight_time()->shape(); + + // Validate Input Tensor: + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 || + input()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2); + + // Validate inputs and output types + if (input()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 || + weight_time()->element_type() == loco::DataType::S8); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32); + + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 || + input_activation_state()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8); + + // Note: now tflite support only ReLU activation for integer SVDF + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU); + } + else if (weight_feature()->element_type() == loco::DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + } + else if ((weight_feature()->element_type() == loco::DataType::U8 || + weight_feature()->element_type() == loco::DataType::S8) && + input()->element_type() == loco::DataType::FLOAT32) + { + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int rank = params().svdf_rank; + const int batch_size = input_shape.dim(0); + const int num_filters = weight_features_shape.dim(0); + LUCI_INTERPRETER_CHECK(rank != 0); + LUCI_INTERPRETER_CHECK(num_filters % rank == 0); + + const int num_units = num_filters / rank; + const int memory_size = weight_time_shape.dim(1); + + // Validate Weight_Feature Input Tensor: + LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1)); + + // Validate Weight_Time Input Tensor: + LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters); + + // Validate Bias + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units); + + // Validate Input Activation State + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters); + + // Resize scratchpad_state to input_activation_state + auto scratchpad_activation_state = getOutputTensors()[1]; + scratchpad_activation_state->resize({batch_size, memory_size * num_filters}); + + // Resize output tensor + output()->resize({batch_size, num_units}); + + luci_interpreter_pal::SetupScratchpadTensor( + input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2], + getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6], + getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units); +} + +void SVDF::execute() const +{ + switch (weight_feature()->element_type()) + { + case loco::DataType::FLOAT32: + evalFloat(); + break; + case loco::DataType::S8: + { + if (input()->element_type() == loco::DataType::S8) + evalInteger(); + else + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + break; + } + default: + throw std::runtime_error("Unsupported type"); + } +} + +void SVDF::evalInteger() const +{ + const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() / + input_activation_state()->scale()); + const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() * + weight_time()->scale() / output()->scale()); + + int32_t effective_scale_1_a; + int effective_scale_1_b; + int32_t effective_scale_2_a; + int effective_scale_2_b; + + tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b); + tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b); + + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = getTfLiteActivation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad = getOutputTensors()[2]; + auto output_temp = getOutputTensors()[3]; + + int32_t input_zp = input()->zero_point(); + int32_t output_zp = output()->zero_point(); + luci_interpreter_pal::IntegerSVDF( + params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()), + getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()), + getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()), + getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad), + getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b, + effective_scale_2_a, effective_scale_2_b, input_zp, output_zp); +} + +void SVDF::evalFloat() const +{ + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = getTfLiteActivation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<float>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad_1 = getOutputTensors()[2]; + + luci_interpreter_pal::FloatSVDF( + params_svdf, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(weight_feature()), getTensorData<float>(weight_feature()), + getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()), + getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data, + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h new file mode 100644 index 000000000..335a6cd8f --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H +#define LUCI_INTERPRETER_KERNELS_SVDF_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SVDF : public KernelWithParams<SVDFParams> +{ +public: + SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weight_feature() const { return _inputs[1]; } + const Tensor *weight_time() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + const Tensor *input_activation_state() const { return _inputs[4]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalInteger() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SVDF_H diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp new file mode 100644 index 000000000..82bd9b009 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SVDFTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(SVDFTest, FullIntegerTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape bias_shape{units}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083, + 0.17660543, 0.52949083, -0.77931279}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1); + std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1); + std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512); + std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16); + + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::S8>( + weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second, + weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = makeInputTensor<DataType::S16>( + weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second, + weight_time_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor( + DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::RELU; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0}; + + std::vector<int32_t> ref_output_shape{batches, units}; + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, FloatTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465, + 0.35867718, 0.36897406, 0.73463392}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883, + -0.03004015, 0.09565311, 0.1587342, 0.00784263}; + + std::vector<float> ref_output_shape{batches, units}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, Unsupported_Type_Configure_NEG) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SVDFTest, Invalid_Input_Shape_NEG) +{ + const int32_t batches = 2; + const int32_t right_input_size = 3; + const int32_t wrong_input_size = 4; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, wrong_input_size}; + Shape weight_feature_shape{num_filters, right_input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Select.cpp b/compiler/luci-interpreter/src/kernels/Select.cpp new file mode 100644 index 000000000..b4ab5f621 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Select.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Select.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +// TODO use select.h when version up +// #include <tensorflow/lite/kernels/internal/reference/select.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Select::Select(const Tensor *condition, const Tensor *t, const Tensor *e, Tensor *output) + : Kernel({condition, t, e}, {output}) +{ + // NOTE _requires_broadcast is for SelectV2 + _requires_broadcast = false; + _has_low_rank_input_condition = false; +} + +void Select::configure() +{ + LUCI_INTERPRETER_CHECK(condition()->element_type() == DataType::BOOL); + LUCI_INTERPRETER_CHECK(t()->element_type() == e()->element_type()); + LUCI_INTERPRETER_CHECK(t()->element_type() == output()->element_type()); + + auto cond_shape = condition()->shape(); + auto cond_num_dims = cond_shape.num_dims(); + auto t_shape = t()->shape(); + + bool is_input_condition_scalar = cond_num_dims == 0; + bool has_rank_one_input_condition = cond_num_dims == 1 && cond_shape.dim(0) == t_shape.dim(0); + + _has_low_rank_input_condition = is_input_condition_scalar || has_rank_one_input_condition; + + output()->resize(calculateShapeForBroadcast(t()->shape(), e()->shape())); +} + +void Select::execute() const +{ + switch (t()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Select: unsupported type."); + } +} + +void Select::evalFloat() const +{ + const auto condition_shape = getTensorShape(condition()); + const auto condition_data = getTensorData<bool>(condition()); + const auto t_shape = getTensorShape(t()); + const auto t_data = getTensorData<float>(t()); + const auto e_shape = getTensorShape(e()); + const auto e_data = getTensorData<float>(e()); + const auto output_shape = getTensorShape(output()); + auto output_data = getTensorData<float>(output()); + + if (_has_low_rank_input_condition) + { + tflite::reference_ops::RankOneSelect(condition_shape, condition_data, t_shape, t_data, e_shape, + e_data, output_shape, output_data); + } + else if (_requires_broadcast) + { + // TODO support broadcast kernel when upgrade to TF2.10.x or above + assert(false); + } + else + { + tflite::reference_ops::Select(condition_shape, condition_data, t_shape, t_data, e_shape, e_data, + output_shape, output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Select.h b/compiler/luci-interpreter/src/kernels/Select.h new file mode 100644 index 000000000..d67b4f5fc --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Select.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SELECT_H +#define LUCI_INTERPRETER_KERNELS_SELECT_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Select : public Kernel +{ +public: + Select(const Tensor *cond, const Tensor *t, const Tensor *e, Tensor *output); + + const Tensor *condition() const { return _inputs[0]; } + const Tensor *t() const { return _inputs[1]; } + const Tensor *e() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + +private: + // for SelectV2 + bool _requires_broadcast = false; + // True if input condition is scalar or input condition has rank one and + // matches the first dimension of other inputs. + bool _has_low_rank_input_condition = false; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SELECT_H diff --git a/compiler/luci-interpreter/src/kernels/Select.test.cpp b/compiler/luci-interpreter/src/kernels/Select.test.cpp new file mode 100644 index 000000000..f74d18dc4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Select.test.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Select.h" +#include "kernels/TestUtils.h" + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SelectTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +std::vector<unsigned char> c_data{ + 1, 1, 1, // Row 1 + 0, 0, 0, // Row 2 +}; + +std::vector<float> t_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 +}; + +std::vector<float> e_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 +}; + +std::vector<float> ref_output_data{ + 0.5, 0.7, 0.9, // Row 1 + -1, 0, 1, // Row 2 +}; + +TEST_F(SelectTest, FloatSimple) +{ + Tensor c_tensor = makeInputTensor<DataType::BOOL>({2, 3}, c_data, _memory_manager.get()); + Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get()); + Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(SelectTest, Invalid_C_Type_NEG) +{ + std::vector<float> i_c_data{ + 1, 1, 1, // Row 1 + 0, 0, 0, // Row 2 + }; + + Tensor c_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, i_c_data, _memory_manager.get()); + Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get()); + Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SelectTest, Invalid_O_Type_NEG) +{ + Tensor c_tensor = makeInputTensor<DataType::BOOL>({2, 3}, c_data, _memory_manager.get()); + Tensor t_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, t_data, _memory_manager.get()); + Tensor e_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, e_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Select kernel(&c_tensor, &t_tensor, &e_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp new file mode 100644 index 000000000..0429fe1e5 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms) + : KernelWithParams<ShapeParams>({input}, {output}, params) +{ +} + +void ShapeKernel::configure() +{ + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or + output()->element_type() == DataType::S64); + const auto input_shape = input()->shape(); + + Shape output_shape(1); + output_shape.dim(0) = input_shape.num_dims(); + + output()->resize(output_shape); +} + +void ShapeKernel::execute() const +{ + switch (params().out_type) + { + case DataType::S32: + evalInt<int32_t>(); + break; + case DataType::S64: + evalInt<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void ShapeKernel::evalInt() const +{ + const auto input_shape = input()->shape(); + + auto output_data = getTensorData<T>(output()); + + for (int i = 0; i < input_shape.num_dims(); ++i) + { + output_data[i] = input_shape.dim(i); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h new file mode 100644 index 000000000..cfaadec91 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H +#define LUCI_INTERPRETER_KERNELS_SHAPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ShapeKernel : public KernelWithParams<ShapeParams> +{ +public: + ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalInt() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp new file mode 100644 index 000000000..4763e016c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ShapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager) +{ + Shape input_shape{1, 3, 1, 3, 5}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(dataType); + + ShapeParams params{}; + params.out_type = dataType; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{1, 3, 1, 3, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{5}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ShapeTest, OutTypeInt) +{ + + // Run for int32_t output + runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get()); + // Run for int64_t output + runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get()); + + SUCCEED(); +} + +TEST_F(ShapeTest, Invalid_Output_Type_NEG) +{ + Shape input_shape{1, 3}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + ShapeParams params{}; + params.out_type = loco::DataType::FLOAT32; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp index c4bc3c57c..2fe2c5471 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.cpp @@ -16,7 +16,7 @@ #include "kernels/Slice.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSlice.h" #include <cassert> #include <cstring> @@ -29,7 +29,7 @@ namespace kernels const int max_dim = 4; Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output) - : Kernel({input, begin, size}, {output}) + : Kernel({input, begin, size}, {output}) { } @@ -131,14 +131,18 @@ void Slice::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::Slice(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::Slice(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + case DataType::S8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output())); break; default: throw std::runtime_error("Unsupported input type."); diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp index a360a29cc..517982990 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Slice.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,11 +31,13 @@ template <typename T> class SliceTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SliceTest, DataTypes); +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(SliceTest, DataTypes); TYPED_TEST(SliceTest, SimpleTest) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; Shape input_shape{3, 2, 3, 1}; std::vector<int32_t> begin_data{1, 0, 0, 0}; @@ -44,14 +47,17 @@ TYPED_TEST(SliceTest, SimpleTest) std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5}; std::vector<int32_t> output_shape{2, 1, 3, 1}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); - Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp index 2fb7f3f2c..c230aaa70 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.cpp +++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp @@ -19,6 +19,7 @@ #include "kernels/Utils.h" #include <tensorflow/lite/kernels/internal/reference/softmax.h> +#include "PALSoftmax.h" #include <stdexcept> @@ -29,13 +30,23 @@ namespace kernels { Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms) - : KernelWithParams<SoftmaxParams>({input}, {output}, params) + : KernelWithParams<SoftmaxParams>({input}, {output}, params) { } void Softmax::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1); + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 || + output()->zero_point() == std::numeric_limits<int8_t>::min()); + tflite::SoftmaxParams op_params{}; + op_params.table = _table; + luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta); + } output()->resize(input()->shape()); } @@ -46,6 +57,12 @@ void Softmax::execute() const case DataType::FLOAT32: evalFloat(); break; + case DataType::S8: + evalQuantized<int8_t>(); + break; + case DataType::U8: + evalQuantized<uint8_t>(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -53,12 +70,23 @@ void Softmax::execute() const void Softmax::evalFloat() const { - tflite::SoftmaxParams params{}; - params.beta = _params.beta; + tflite::SoftmaxParams op_params{}; + op_params.beta = params().beta; - tflite::reference_ops::Softmax(params, getTensorShape(input()), getTensorData<float>(input()), + tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()), getTensorData<float>(output())); } +template <typename T> void Softmax::evalQuantized() const +{ + tflite::SoftmaxParams op_params{}; + op_params.table = const_cast<float *>(_table); + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta); + luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()), + getTensorShape(output()), getTensorData<T>(output())); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-interpreter/src/kernels/Softmax.h index 2e4eda492..1f281df1c 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.h +++ b/compiler/luci-interpreter/src/kernels/Softmax.h @@ -38,6 +38,9 @@ public: private: void evalFloat() const; + template <typename T> void evalQuantized() const; + + float _table[256]; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp index 2193c3e83..08e70672d 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Softmax.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,33 +27,89 @@ namespace using namespace testing; -TEST(SoftmaxTest, Float) +template <typename T> constexpr loco::DataType toLocoDataType(); + +template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; } + +template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; } + +template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; } + +template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Shape input_shape{2, 1, 2, 3}; - std::vector<float> input_data{ - 5, -9, 8, // - -7, 2, -4, // - 1, -2, 9, // - 3, -6, -1, // - }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(toLocoDataType<T>()); SoftmaxParams params{}; params.beta = 0.1; Softmax kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - std::vector<float> ref_output_data{ - 0.38514, 0.09497, 0.51989, // - 0.20792, 0.51141, 0.28067, // - 0.25212, 0.18678, 0.56110, // - 0.48149, 0.19576, 0.32275, // - }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f), + std::max<float>(std::max<float>(input_data), 0.f)); + std::pair<float, int32_t> output_quant_param = + quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f), + std::max<float>(std::max<float>(output_data), 0.f)); + Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first, + input_quant_param.second, input_data, + memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second); + + SoftmaxParams params{}; + params.beta = 0.1; + + Softmax kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class SoftmaxTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(SoftmaxTest, DataTypes); + +TYPED_TEST(SoftmaxTest, Simple) +{ + Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3}, + { + 5, -9, 8, // + -7, 2, -4, // + 1, -2, 9, // + 3, -6, -1, // + }, + { + 0.38514, 0.09497, 0.51989, // + 0.20792, 0.51141, 0.28067, // + 0.25212, 0.18678, 0.56110, // + 0.48149, 0.19576, 0.32275, // + }); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp new file mode 100644 index 000000000..630cd38c4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/Utils.h" + +#include "PALSpaceToBatchND.h" + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; + +} // namespace + +SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape, + const Tensor *paddings, Tensor *output) + : Kernel({input, block_shape, paddings}, {output}) +{ +} + +void SpaceToBatchND::configure() +{ + const auto *block_shape_data = block_shape()->data<int32_t>(); + const auto *paddings_data = paddings()->data<int32_t>(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2); + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + int final_dim_size = + (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]); + LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0); + output_shape.dim(i + 1) = final_dim_size / block_shape_data[i]; + output_batch_size = output_batch_size * block_shape_data[i]; + } + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void SpaceToBatchND::execute() const +{ + switch (input()->element_type()) + { + tflite::SpaceToBatchParams op_params; + case DataType::FLOAT32: + op_params.output_offset = 0; + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), + getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + op_params.output_offset = output()->zero_point(); + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), + getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h new file mode 100644 index 000000000..0893003bb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H +#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SpaceToBatchND : public Kernel +{ +public: + SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *paddings() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp new file mode 100644 index 000000000..3a8b0a812 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> paddings_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <> +void Check<uint8_t>( + std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T> class SpaceToBatchNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes); + +TYPED_TEST(SpaceToBatchNDTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2}, + /*paddings_shape=*/{2, 2}, + /*output_shape=*/{6, 2, 2, 1}, + /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0}, + /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0}, + /*output_data=*/{0, 0, 0, -0.5, 0, 0, 0, 0.6, 0, -1.0, 0, -0.7, + 0, 0.2, 0, 0.8, 0, -0.3, 0, -0.9, 0, 0.4, 0, 1.0}); +} + +TEST(SpaceToBatchNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp index 6a5bd7cf8..7c29e8cb0 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp @@ -16,7 +16,7 @@ #include "SpaceToDepth.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSpaceToDepth.h" namespace luci_interpreter { @@ -24,7 +24,7 @@ namespace kernels { SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms) - : KernelWithParams<SpaceToDepthParams>({input}, {output}, params) + : KernelWithParams<SpaceToDepthParams>({input}, {output}, params) { } @@ -61,14 +61,14 @@ void SpaceToDepth::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp index e4a0fd642..4af488618 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp @@ -16,6 +16,7 @@ #include "kernels/SpaceToDepth.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,23 +32,27 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SpaceToDepthTest, DataTypes); +TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes); TYPED_TEST(SpaceToDepthTest, SimpleCase) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<TypeParam>(); std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8}; Shape input_shape{1, 2, 2, 2}; - Tensor input_tensor{getElementType<TypeParam>(), input_shape, {{}, {}}, ""}; - input_tensor.writeData(input_data.data(), input_data.size() * sizeof(TypeParam)); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8}; std::vector<int32_t> output_shape{1, 1, 1, 8}; - Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); + Tensor output_tensor = makeOutputTensor(element_type); SpaceToDepthParams params{}; params.block_size = 2; SpaceToDepth kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp index 325b1c22f..1a563f307 100644 --- a/compiler/luci-interpreter/src/kernels/Split.cpp +++ b/compiler/luci-interpreter/src/kernels/Split.cpp @@ -18,7 +18,7 @@ #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSplit.h" namespace luci_interpreter { @@ -26,7 +26,7 @@ namespace kernels { Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs) - : Kernel({axis, input}, std::move(outputs)) + : Kernel({axis, input}, std::move(outputs)) { } @@ -56,11 +56,11 @@ void Split::execute() const params.num_split = _outputs.size(); params.axis = _axis_value; -#define TF_LITE_SPLIT(scalar) \ - { \ - VectorOfTensors<scalar, false> all_outputs(_outputs); \ - tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ - all_outputs.shapes(), all_outputs.data()); \ +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ } switch (input()->element_type()) diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp index 11d0b1ea9..283cd9aa9 100644 --- a/compiler/luci-interpreter/src/kernels/Split.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Split.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,11 +31,14 @@ using namespace testing; template <typename T> void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, - std::vector<std::vector<T>> output_data, DataType element_type) + std::vector<std::vector<T>> output_data) { - Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}); - Tensor input_tensor{element_type, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<T>(); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<Tensor> output_tensors; output_tensors.reserve(num_splits); @@ -51,6 +55,10 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape, Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs)); kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } kernel.execute(); for (int i = 0; i < num_splits; ++i) @@ -65,60 +73,55 @@ template <typename T> class SplitTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SplitTest, DataTypes); +TYPED_TEST_SUITE(SplitTest, DataTypes); TYPED_TEST(SplitTest, FourDimensional) { Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, { - {1, 2, 3, 4, 5, 6, 7, 8}, // - {9, 10, 11, 12, 13, 14, 15, 16}, // - }, - getElementType<TypeParam>()); + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, // + }); Check<TypeParam>( - /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { - {1, 2, 3, 4, 9, 10, 11, 12}, // - {5, 6, 7, 8, 13, 14, 15, 16}, // - }, - getElementType<TypeParam>()); + /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 9, 10, 11, 12}, // + {5, 6, 7, 8, 13, 14, 15, 16}, // + }); Check<TypeParam>( - /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { - {1, 2, 5, 6, 9, 10, 13, 14}, // - {3, 4, 7, 8, 11, 12, 15, 16}, // - }, - getElementType<TypeParam>()); + /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 5, 6, 9, 10, 13, 14}, // + {3, 4, 7, 8, 11, 12, 15, 16}, // + }); Check<TypeParam>( - /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { - {1, 3, 5, 7, 9, 11, 13, 15}, // - {2, 4, 6, 8, 10, 12, 14, 16}, // - }, - getElementType<TypeParam>()); + /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 3, 5, 7, 9, 11, 13, 15}, // + {2, 4, 6, 8, 10, 12, 14, 16}, // + }); } TYPED_TEST(SplitTest, OneDimensional) { Check<TypeParam>( - /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, - {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}, getElementType<TypeParam>()); + /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); } TYPED_TEST(SplitTest, NegativeAxis) { Check<TypeParam>( - /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - { - {1, 2, 3, 4, 5, 6, 7, 8}, // - {9, 10, 11, 12, 13, 14, 15, 16}, - }, - getElementType<TypeParam>()); + /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, + }); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp new file mode 100644 index 000000000..aa6820889 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SplitV.h" + +#include "Utils.h" + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs) + : Kernel({input, size_splits, axis}, std::move(outputs)) +{ +} + +void SplitV::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData<int32_t>(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + auto num_split = static_cast<int32_t>(_outputs.size()); + auto sizes_data = getTensorData<int32_t>(size_splits()); + + assert(size_splits()->shape().num_dims() == 1); + + int32_t sum = 0; + const auto num_dims_size_spits = size_splits()->shape().dim(0); + int32_t count_neg_dim = 0; + + for (int32_t i = 0; i < num_dims_size_spits - 1; ++i) + { + if (sizes_data[i] != -1) + { + sum += sizes_data[i]; + } + else + { + count_neg_dim++; + } + } + assert(count_neg_dim < 2); + assert(size_splits()->shape().num_elements() == num_split); + + auto output_shape = input()->shape(); + for (int32_t i = 0; i < num_split; ++i) + { + if (sizes_data[i] == -1) + { + output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum; + } + else + { + output_shape.dim(_axis_value) = sizes_data[i]; + } + _outputs[i]->resize(output_shape); + } +} + +void SplitV::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + case DataType::S16: + TF_LITE_SPLIT(int16_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-interpreter/src/kernels/SplitV.h new file mode 100644 index 000000000..92f6288fb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SplitV : public Kernel +{ +public: + SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size_splits() const { return _inputs[1]; } + const Tensor *axis() const { return _inputs[2]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp new file mode 100644 index 000000000..035bc2122 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SplitV.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(int axis, std::initializer_list<int32_t> splits_size, + std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data, + std::vector<std::vector<T>> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + + auto num_splits = static_cast<int32_t>(splits_size.size()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor sizes_tensor = + makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + + std::vector<Tensor> output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector<Tensor *> output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + auto tmp = extractTensorData<T>(output_tensors[i]); + EXPECT_THAT(extractTensorData<T>(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template <typename T> class SplitVTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_SUITE(SplitVTest, DataTypes); + +TYPED_TEST(SplitVTest, ThreeDimensional) +{ + Check<TypeParam>( + /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 4, 5, 6, 7, 8, 9}, // + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 10, 11, 12, 19, 20, 21}, // + {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 4, 7, 10, 13, 16, 19, 22, 25}, // + {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp index cdd208280..96835fbfc 100644 --- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Sqrt.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,52 +30,58 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Sqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); - EXPECT_THAT(extractTensorData<float>(output_tensor), - ::testing::ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } TEST(SqrtTest, SimpleSqrt) { Check( - /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, - /*input_data=*/ - { - 0, 8, 2, 4, // - 3, 7, 10, 0.3, // - }, - /*output_data=*/ - { - 0.0, 2.8284271, 1.4142136, 2, // - 1.7320508, 2.6457513, 3.1622777, 0.54772256, // - }); + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, 8, 2, 4, // + 3, 7, 10, 0.3, // + }, + /*output_data=*/ + { + 0.0, 2.8284271, 1.4142136, 2, // + 1.7320508, 2.6457513, 3.1622777, 0.54772256, // + }); } TEST(SqrtTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Sqrt kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(AddTest, Invalid_Input_Type_NEG) +TEST(SqrtTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Sqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-interpreter/src/kernels/Square.cpp new file mode 100644 index 000000000..bc71905c1 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Square.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Square::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Square::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Square::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = (*i) * (*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Square.h b/compiler/luci-interpreter/src/kernels/Square.h new file mode 100644 index 000000000..73ed5a707 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Square.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H +#define LUCI_INTERPRETER_KERNELS_SQUARE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Square : public Kernel +{ +public: + Square(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H diff --git a/compiler/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-interpreter/src/kernels/Square.test.cpp new file mode 100644 index 000000000..51662dea7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Square.test.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquareTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Square kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp new file mode 100644 index 000000000..3bafeba4a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void SquaredDifference::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void SquaredDifference::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalSquaredDifference<float>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void SquaredDifference::evalSquaredDifference() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) { + const T difference = x - y; + return difference * difference; + }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-interpreter/src/kernels/SquaredDifference.h new file mode 100644 index 000000000..9327caf93 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H +#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SquaredDifference : public Kernel +{ +public: + SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalSquaredDifference() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp new file mode 100644 index 000000000..2819c01e2 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquaredDifferenceTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(SquaredDifferenceTest, FloatBroadcast) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape1{3, 1, 2}; + Shape input_shape2{1}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{1.0}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.cpp index ce43ef789..4a75518c7 100644 --- a/compiler/luci-interpreter/src/kernels/Squeeze.cpp +++ b/compiler/luci-interpreter/src/kernels/Squeeze.cpp @@ -27,7 +27,7 @@ namespace kernels { Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms) - : KernelWithParams<SqueezeParams>({input}, {output}, params) + : KernelWithParams<SqueezeParams>({input}, {output}, params) { } diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp index 3a34284dd..1bc0b6459 100644 --- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Squeeze.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,20 +30,21 @@ using namespace testing; template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, std::initializer_list<T> output_data, - DataType element_type, std::vector<int32_t> squeeze_dims) + std::initializer_list<int32_t> squeeze_dims) { - Tensor input_tensor{element_type, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); SqueezeParams params{}; - for (size_t i = 0; i < squeeze_dims.size(); i++) - { - params.squeeze_dims.push_back(squeeze_dims.at(i)); - } + params.squeeze_dims = squeeze_dims; Squeeze kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -54,17 +56,17 @@ template <typename T> class SqueezeTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(SqueezeTest, DataTypes); +TYPED_TEST_SUITE(SqueezeTest, DataTypes); TYPED_TEST(SqueezeTest, TotalTest) { Check<TypeParam>( - /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24}, - /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, - /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, - getElementType<TypeParam>(), {-1, 0}); + /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + {-1, 0}); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp index 679485439..a8730d861 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/strided_slice.h> #include <stdexcept> @@ -31,7 +31,7 @@ namespace kernels StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms) - : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params) + : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params) { } @@ -82,7 +82,7 @@ void StridedSlice::configure() assert(stride != 0); int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx); int32_t end = - ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin); + ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin); const bool shrink_axis = params().shrink_axis_mask & (1 << idx); if (shrink_axis) @@ -136,6 +136,11 @@ void StridedSlice::execute() const getTensorData<uint8_t>(input()), getTensorShape(output()), getTensorData<uint8_t>(output())); break; + case DataType::S32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData<int32_t>(input()), getTensorShape(output()), + getTensorData<int32_t>(output())); + break; default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp index 5ab06e2ec..399cdebed 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp @@ -16,6 +16,7 @@ #include "kernels/StridedSlice.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -28,6 +29,8 @@ using namespace testing; TEST(StridedSliceTest, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{2, 3, 2}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape begin_shape{3}; @@ -36,17 +39,15 @@ TEST(StridedSliceTest, Float) std::vector<int32_t> end_data{1, 3, 2}; Shape strides_shape{3}; std::vector<int32_t> strides_data{1, 1, 1}; - Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""}; - Tensor begin_tensor{DataType::S32, begin_shape, {}, ""}; - Tensor end_tensor{DataType::S32, end_shape, {}, ""}; - Tensor strides_tensor{DataType::S32, strides_shape, {}, ""}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); - input_tensor.writeData(input_data.data(), input_data.size() * sizeof(float)); - begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t)); - end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t)); - strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t)); - StridedSliceParams params{}; params.begin_mask = 0; params.end_mask = 0; @@ -57,37 +58,36 @@ TEST(StridedSliceTest, Float) StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> output_shape{3, 2}; std::vector<float> output_data{1, 2, 3, 4, 5, 6}; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } TEST(StridedSliceTest, Uint8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{2, 3, 2}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - std::vector<uint8_t> quant_input_data = quantize<uint8_t>(input_data, 1.0f, 0); Shape begin_shape{3}; std::vector<int32_t> begin_data{0, 0, 0}; Shape end_shape{3}; std::vector<int32_t> end_data{1, 3, 2}; Shape strides_shape{3}; std::vector<int32_t> strides_data{1, 1, 1}; - Tensor input_tensor{DataType::U8, input_shape, {{1.0f}, {0}}, ""}; - Tensor begin_tensor{DataType::S32, begin_shape, {}, ""}; - Tensor end_tensor{DataType::S32, end_shape, {}, ""}; - Tensor strides_tensor{DataType::S32, strides_shape, {}, ""}; + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0); - input_tensor.writeData(quant_input_data.data(), quant_input_data.size() * sizeof(uint8_t)); - begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t)); - end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t)); - strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t)); - StridedSliceParams params{}; params.begin_mask = 0; params.end_mask = 0; @@ -98,13 +98,12 @@ TEST(StridedSliceTest, Uint8) StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> output_shape{3, 2}; std::vector<float> output_data{1, 2, 3, 4, 5, 6}; - EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(output_data))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp new file mode 100644 index 000000000..1fd583c62 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sub.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/Utils.h" + +#include "PALSub.h" + +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms) + : KernelWithParams<SubParams>({input1, input2}, {output}, params) +{ +} + +void Sub::configure() +{ + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type())) + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type())) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Sub::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sub::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Sub::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Sub::evalQuantized() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const int left_shift = 20; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + params.left_shift = left_shift; + // The kernel expects inputs' zero points to be negated. + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input1_multiplier = input1_multiplier; + params.input1_shift = input1_shift; + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.input2_multiplier = input2_multiplier; + params.input2_shift = input2_shift; + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastQuantSubSlow( + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), + getTensorShape(input2()), getTensorData<uint8_t>(input2()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h new file mode 100644 index 000000000..23952b3bd --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sub.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SUB_H +#define LUCI_INTERPRETER_KERNELS_SUB_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sub : public KernelWithParams<SubParams> +{ +public: + Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SUB_H diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp new file mode 100644 index 000000000..9abafd49a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +#include <algorithm> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; +using std::pair; +using std::vector; +using std::transform; +using std::initializer_list; + +class SubTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +// for quantized Add, the error shouldn't exceed step +float GetTolerance(float min, float max) +{ + float kQuantizedStep = (max - min) / 255.0; + return kQuantizedStep; +} + +TEST_F(SubTest, Uint8) +{ + Shape base_shape = {2, 3, 1, 2}; + vector<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector<vector<float>> output_data = { + {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f, + 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f}, + {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f}, + {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f}, + {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}}; + + float kQuantizedTolerance = GetTolerance(-3.f, 3.f); + pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } + + // Inversion step for output_data, because subtract is not commutative operation + auto multiply = [](auto &i) { + transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; }); + }; + for_each(output_data.begin(), output_data.end(), multiply); + + // Re-run with exchanged inputs. + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +TEST_F(SubTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector<vector<float>> test_outputs = { + {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f, + 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}}; + + vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0, + 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0}, + {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0}, + {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0, + 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0}, + {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(SubTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(SubTest, Mismatching_Input_Int_Types_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sum.cpp b/compiler/luci-interpreter/src/kernels/Sum.cpp new file mode 100644 index 000000000..645f02c36 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sum.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sum.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reduce.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +Sum::Sum(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params) +{ +} + +void Sum::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + LUCI_INTERPRETER_CHECK(num_axes <= 4); + + // We compute shapes of outputs in configure, assuming that outputs have + // static shape + // TODO Support dynamic shape + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); +} + +void Sum::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sum::evalFloat() const +{ + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + int num_resolved_axis = 0; + LUCI_INTERPRETER_CHECK( + tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes, + getTensorData<int>(resolved_axes), &num_resolved_axis)); + + float init_value = 0.0; + tflite::reference_ops::ReduceGeneric<float>( + getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<float>(output()), getTensorShape(output()).DimsData(), + output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value, + [](const float current, const float in) -> float { return current + in; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sum.h b/compiler/luci-interpreter/src/kernels/Sum.h new file mode 100644 index 000000000..290e0dafa --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sum.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SUM_H +#define LUCI_INTERPRETER_KERNELS_SUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sum : public KernelWithParams<ReducerParams> +{ +public: + // TODO Add temp_sum to support quantized kernels + Sum(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SUM_H diff --git a/compiler/luci-interpreter/src/kernels/Sum.test.cpp b/compiler/luci-interpreter/src/kernels/Sum.test.cpp new file mode 100644 index 000000000..e2dc3012b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Sum.test.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(SumTest, FloatNotKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 0}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{144, 156}; + std::initializer_list<int32_t> ref_output_shape{2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SumTest, FloatKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{84, 100, 116}; + std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SumTest, Input_Output_Type_NEG) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + ReducerParams params{}; + params.keep_dims = true; + + Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SumTest, Invalid_Axes_Type_NEG) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int64_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Sum kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp index b649d5d2f..d47a0bde9 100644 --- a/compiler/luci-interpreter/src/kernels/Tanh.cpp +++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp @@ -17,8 +17,9 @@ #include "kernels/Tanh.h" #include "kernels/Utils.h" +#include <limits> // std::numeric_limits -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/tanh.h> namespace luci_interpreter { @@ -29,7 +30,7 @@ Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} void Tanh::configure() { - assert(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); if (input()->element_type() == DataType::U8) { populateLookupTable(); diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp index 392b8672d..bfae479a9 100644 --- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Tanh.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,29 +28,38 @@ namespace using namespace testing; -TEST(TanhTest, Float) +class TanhTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(TanhTest, Float) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ - 0, -6, 2, 4, // - 3, -2, 10, 1, // + 0, -6, 2, 4, // + 3, -2, 10, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Tanh kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 0, -0.9999877, 0.9640275, 0.999329, // - 0.99505475, -0.9640275, 1, 0.7615941, // + 0, -0.9999877, 0.9640275, 0.999329, // + 0.99505475, -0.9640275, 1, 0.7615941, // }; - EXPECT_THAT(extractTensorData<float>(output_tensor), - ElementsAreArray(ArrayFloatNear(ref_output_data))); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(TanhTest, Uint8) +TEST_F(TanhTest, Uint8) { float kMin = -1; float kMax = 127.f / 128.f; @@ -57,52 +67,98 @@ TEST(TanhTest, Uint8) std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax); std::vector<float> input_data{ - 0, -6, 2, 4, // - -4, -2, 8, 1, // - 0, -6, 2, 4, // - -4, -2, 8, 1, // - 0, -6, 2, 4, // - -4, -2, 8, 1, // - 0, -6, 2, 4, // - -4, -2, 8, 1, // - 0, -6, 2, 4, // - -4, -2, 8, 1, // - 0, -6, 2, 4, // - -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // }; - Tensor input_tensor{ - DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""}; + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); Tensor output_tensor = - makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); - std::vector<uint8_t> quantize_input = - quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second); - input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t)); + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); Tanh kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // - 0.0, -0.999987, 0.964027, 0.999329, // - -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // }; std::vector<int32_t> ref_output_shape{2, 6, 4, 1}; - EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(), - output_tensor.zero_point()), - ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance))); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } +TEST_F(TanhTest, InputTypeInvalid_NEG) +{ + std::vector<int64_t> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(TanhTest, InputOutputMismatch_NEG) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Tanh kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + } // namespace } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp index 2c8a6ae78..4d983adda 100644 --- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp +++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp @@ -17,6 +17,8 @@ #include "kernels/TestUtils.h" +#include <stdexcept> + namespace luci_interpreter { namespace kernels @@ -34,7 +36,72 @@ Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point) return Tensor(element_type, {}, {{scale}, {zero_point}}, ""); } -std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, float max_abs_error) +std::vector<float> dequantizeTensorData(const Tensor &tensor) +{ + if (tensor.element_type() == DataType::U8) + { + std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + if (tensor.element_type() == DataType::S8) + { + std::vector<int8_t> data = extractTensorData<int8_t>(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + else if (tensor.element_type() == DataType::S16) + { + // S16 quantization is symmetric, so zero point should be zero. + for (auto zp : tensor.zero_points()) + { + (void)zp; + assert(zp == 0); + } + + std::vector<int16_t> data = extractTensorData<int16_t>(tensor); + if (tensor.scales().size() == 1) + { + return dequantize(data.data(), data.size(), tensor.scale(), 0); + } + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + const Shape shape = tensor.shape(); + const int32_t quantized_dimension = tensor.quantized_dimension(); + assert(quantized_dimension < shape.num_dims()); + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == tensor.scales().size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector<float> dequantized_data; + dequantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + float scale = tensor.scales()[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector<float> part_dequantized_data = + dequantize(data.data() + offset, inner_dims_size, scale, 0); + dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(), + part_dequantized_data.end()); + } + return dequantized_data; + } + else + { + throw std::runtime_error("Unsupported type."); + } +} + +Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error) { std::vector<Matcher<float>> matchers; matchers.reserve(values.size()); @@ -42,7 +109,7 @@ std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, flo { matchers.emplace_back(FloatNear(v, max_abs_error)); } - return matchers; + return ElementsAreArray(matchers); } std::vector<int32_t> extractTensorShape(const Tensor &tensor) diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h index 5311a1949..b9c942e9a 100644 --- a/compiler/luci-interpreter/src/kernels/TestUtils.h +++ b/compiler/luci-interpreter/src/kernels/TestUtils.h @@ -19,8 +19,10 @@ #define LUCI_INTERPRETER_KERNELS_TESTUTILS_H #include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" #include <type_traits> +#include <limits> // std::numeric_limits #include <gtest/gtest.h> #include <gmock/gmock.h> @@ -32,14 +34,97 @@ namespace kernels namespace testing { +template <typename T> +std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point); + template <DataType DT> -Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data) +Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data, + IMemoryManager *memory_manager) { Tensor tensor(DT, shape, {}, ""); + memory_manager->allocate_memory(tensor); tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type)); return tensor; } +/** + * @brief Create layer-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scale scale of quantized number + * @param zero_point zero point of quantized number, should be 0 for signed datatypes + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template <DataType DT> +Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point, + const std::vector<float> &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl<DT>::Type; + Tensor tensor(DT, shape, {{scale}, {zero_point}}, ""); + std::vector<NativeT> quantized_data = + quantize<NativeT>(data.data(), data.size(), scale, zero_point); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + +/** + * @brief Create channel-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scales scales of quantized number + * @param zero_points zero points of quantized number, should be 0 for signed datatypes + * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template <DataType DT> +Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales, + const std::vector<int32_t> &zero_points, int quantized_dimension, + const std::vector<float> &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl<DT>::Type; + assert(quantized_dimension < shape.num_dims()); + Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, ""); + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == scales.size()); + assert(quant_dim_size == zero_points.size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector<NativeT> quantized_data; + quantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + int32_t zero_point = zero_points[channel]; + float scale = scales[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector<NativeT> part_quantized_data = + quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point); + quantized_data.insert(quantized_data.end(), part_quantized_data.begin(), + part_quantized_data.end()); + } + assert(quantized_data.size() == shape.num_elements()); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + Tensor makeOutputTensor(DataType element_type); Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point); @@ -50,12 +135,26 @@ template <typename T> constexpr DataType getElementType() { if (std::is_same<T, float>::value) return DataType::FLOAT32; + if (std::is_same<T, double>::value) + return DataType::FLOAT64; if (std::is_same<T, uint8_t>::value) return DataType::U8; + if (std::is_same<T, uint16_t>::value) + return DataType::U16; + if (std::is_same<T, uint32_t>::value) + return DataType::U32; + if (std::is_same<T, uint64_t>::value) + return DataType::U64; + if (std::is_same<T, int8_t>::value) + return DataType::S8; + if (std::is_same<T, int16_t>::value) + return DataType::S16; if (std::is_same<T, int32_t>::value) return DataType::S32; if (std::is_same<T, int64_t>::value) return DataType::S64; + if (std::is_same<T, bool>::value) + return DataType::BOOL; return DataType::Unknown; } @@ -65,47 +164,62 @@ template <typename T> std::vector<T> extractTensorData(const Tensor &tensor) return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements()); } -std::vector<::testing::Matcher<float>> ArrayFloatNear(const std::vector<float> &values, +std::vector<float> dequantizeTensorData(const Tensor &tensor); + +// Array version of `::testing::FloatNear` matcher. +::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error = 1.0e-5f); template <typename T> -inline std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point) +std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point) { - assert(!std::is_floating_point<T>::value); + static_assert(std::is_integral<T>::value, "Integral type expected."); + + float q_min{}, q_max{}; + if (std::is_signed<T>::value) + { + q_min = -std::numeric_limits<T>::max(); + q_max = std::numeric_limits<T>::max(); + } + else + { + q_min = 0; + q_max = std::numeric_limits<T>::max(); + } + std::vector<T> q; - for (const auto &f : data) + for (size_t i = 0; i < num_elements; ++i) { - q.push_back(static_cast<T>(std::max<float>( - std::numeric_limits<T>::lowest(), - std::min<float>(std::numeric_limits<T>::max(), std::round(zero_point + (f / scale)))))); + const auto &f = data[i]; + q.push_back(static_cast<T>( + std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale)))))); } return q; } template <typename T> -inline std::vector<float> dequantize(const std::vector<T> &data, float scale, int32_t zero_point) +std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point) { - assert(!std::is_floating_point<T>::value); + static_assert(std::is_integral<T>::value, "Integral type expected."); std::vector<float> f; - for (const T &q : data) + for (size_t i = 0; i < num_elements; ++i) { + const T &q = data[i]; f.push_back(scale * (q - zero_point)); } return f; } +// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned). template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max) { - if (std::is_floating_point<T>::value) - { - return {1.0f, 0}; - } + static_assert(std::is_integral<T>::value, "Integral type expected."); int32_t zero_point = 0; - double scale = 0; + float scale = 0; const T qmin = std::numeric_limits<T>::lowest(); const T qmax = std::numeric_limits<T>::max(); - const double qmin_double = qmin; - const double qmax_double = qmax; + const float qmin_double = qmin; + const float qmax_double = qmax; // 0 should always be a representable value. Let's assume that the initial // min,max range contains 0. assert(f_max >= 0); @@ -131,16 +245,16 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, // The arithmetic error on the zero point computed from either pair // will be roughly machine_epsilon * (sum of absolute values of terms) // so we want to use the variant that adds the smaller terms. - const double zero_point_from_min = qmin_double - f_min / scale; - const double zero_point_from_max = qmax_double - f_max / scale; + const float zero_point_from_min = qmin_double - f_min / scale; + const float zero_point_from_max = qmax_double - f_max / scale; - const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale); + const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale); - const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale); + const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale); - const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; + const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; // Now we need to nudge the zero point to be an integer // (our zero points are integer, and this is motivated by the requirement @@ -168,7 +282,7 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, assert(qmin <= nudged_zero_point); zero_point = nudged_zero_point; // finally, return the values - return {static_cast<float>(scale), zero_point}; + return {scale, zero_point}; } inline float getTolerance(float min, float max, int quantize_steps) diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp index 8265d9937..802d87295 100644 --- a/compiler/luci-interpreter/src/kernels/Transpose.cpp +++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/transpose.h> #include <stdexcept> @@ -29,7 +29,7 @@ namespace kernels { Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output) - : Kernel({input, perm}, {output}) + : Kernel({input, perm}, {output}) { } @@ -37,7 +37,7 @@ void Transpose::configure() { // Transpose op only supports 1D-4D input arrays. int dims = input()->shape().num_dims(); - const int *perm_data = getTensorData<int32_t>(perm()); + const int32_t *perm_data = getTensorData<int32_t>(perm()); assert(input()->shape().num_dims() <= 4); assert(input()->element_type() == output()->element_type()); @@ -58,8 +58,8 @@ void Transpose::configure() void Transpose::execute() const { tflite::TransposeParams params{}; - const int *perm_data = getTensorData<int32_t>(perm()); - const int size = perm()->shape().dim(0); + const int32_t *perm_data = getTensorData<int32_t>(perm()); + const int32_t size = perm()->shape().dim(0); params.perm_count = size; for (int i = 0; i < size; i++) params.perm[i] = perm_data[i]; diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp index 87e6e2a00..43be8f8b9 100644 --- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Transpose.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,18 +30,18 @@ using namespace testing; template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, - std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data, - DataType element_type) + std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data) { - Tensor input_tensor{element_type, input_shape, {}, ""}; - input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T)); - - Tensor perm_tensor{DataType::S32, perm_shape, {}, ""}; - perm_tensor.writeData(perm_data.begin(), perm_data.size() * sizeof(int32_t)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); Transpose kernel(&input_tensor, &perm_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -51,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(TransposeTest, DataTypes); +TYPED_TEST_SUITE(TransposeTest, DataTypes); TYPED_TEST(TransposeTest, Small3D) { @@ -60,56 +61,53 @@ TYPED_TEST(TransposeTest, Small3D) 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, /*perm_data=*/{2, 0, 1}, /*output_data=*/{0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21, - 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23}, - getElementType<TypeParam>()); + 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23}); } TYPED_TEST(TransposeTest, Large4D) { Check<TypeParam>( - /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5}, - /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, - /*perm_data=*/{2, 0, 1, 3}, - /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, - 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, - 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49, - 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109, - 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, - 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, - 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, - 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}, - getElementType<TypeParam>()); + /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{2, 0, 1, 3}, + /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, + 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, + 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49, + 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109, + 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, + 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, + 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, + 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}); } TYPED_TEST(TransposeTest, Large2D) { Check<TypeParam>( - /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10}, - /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, - /*perm_data=*/{1, 0}, - /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49, - 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110, - 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52, - 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113, - 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55, - 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116, - 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58, - 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119}, - getElementType<TypeParam>()); + /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{1, 0}, + /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49, + 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110, + 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52, + 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113, + 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55, + 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116, + 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58, + 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119}); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp index 898bae3da..08bfbf319 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp @@ -19,9 +19,10 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h> #include <stdexcept> +#include <limits> // std::numeric_limits namespace luci_interpreter { @@ -30,31 +31,27 @@ namespace kernels { TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms) - : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params) + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms) + : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, + {output, scratch_tensor}, params) { } +TransposeConv::~TransposeConv() +{ + // Define destructor here, to delete vector of qunatized multipliers properly +} + void TransposeConv::configure() { assert(output_shape()->shape().num_dims() == 1); assert(input()->shape().num_dims() == 4); assert(filter()->shape().num_dims() == 4); - assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8); + assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 || + input()->element_type() == DataType::S16); assert(input()->element_type() == output()->element_type()); assert(input()->shape().dim(3) == filter()->shape().dim(3)); - if (input()->element_type() == DataType::U8) - { - _scratch_tensor = - std::make_unique<Tensor>(DataType::S32, output()->shape(), AffineQuantization{}, ""); - double real_multiplier = 0.0; - const double input_product_scale = input()->scale() * filter()->scale(); - assert(input_product_scale >= 0); - real_multiplier = input_product_scale / output()->scale(); - int exponent; - quantizeMultiplier(real_multiplier, &_output_multiplier, &exponent); - _output_shift = -exponent; - } const int num_dims = output_shape()->shape().dim(0); Shape out_shape(num_dims); @@ -62,6 +59,36 @@ void TransposeConv::configure() for (int i = 0; i < num_dims; i++) out_shape.dim(i) = shape_data[i]; output()->resize(out_shape); + + const int32_t filter_height = filter()->shape().dim(1); + const int32_t filter_width = filter()->shape().dim(2); + const int32_t output_height = out_shape.dim(1); + const int32_t output_width = out_shape.dim(2); + + const int32_t unused_output_height = + computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1); + const int32_t unused_output_width = + computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1); + + _padding_height = + computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height); + _padding_width = + computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width); + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->resize(output()->shape()); + const std::vector<double> real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + _quant_multipliers = quantizeMultipliers(real_multipliers); + } + else + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->set_allocatable(false); + } } void TransposeConv::execute() const @@ -72,7 +99,20 @@ void TransposeConv::execute() const evalFloat(); break; case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S16: + evalQuantizedS16(); break; default: throw std::runtime_error("Unsupported type."); @@ -81,74 +121,231 @@ void TransposeConv::execute() const void TransposeConv::evalFloat() const { - const int width = output()->shape().dim(2); - const int height = output()->shape().dim(1); - - const int filter_width = filter()->shape().dim(2); - const int filter_height = filter()->shape().dim(1); - - int unused_output_height, unused_output_width; - unused_output_width = - computeOutputSize(params().padding, width, filter_width, params().stride_width, 1); - unused_output_height = - computeOutputSize(params().padding, height, filter_height, params().stride_height, 1); - int32_t offset = 0; tflite::ConvParams op_params{}; op_params.padding_type = tflite::PaddingType::kSame; - op_params.padding_values.height = computePaddingWithOffset( - params().stride_height, 1, height, filter_height, unused_output_height, &offset); - op_params.padding_values.height_offset = offset; - op_params.padding_values.width = computePaddingWithOffset( - params().stride_width, 1, width, filter_width, unused_output_width, &offset); - op_params.padding_values.width_offset = offset; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; op_params.stride_height = params().stride_height; op_params.stride_width = params().stride_width; - op_params.output_multiplier = _output_multiplier; - tflite::reference_ops::TransposeConv( - op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), - (float *)nullptr); + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData<float>(input()), // + getTensorShape(filter()), getTensorData<float>(filter()), // + getTensorShape(bias()), getTensorData<float>(bias()), // + getTensorShape(output()), getTensorData<float>(output()), // + tflite::RuntimeShape(), nullptr); } void TransposeConv::evalQuantized() const { - int32_t input_offset = -input()->zero_point(); - int32_t filter_offset = -filter()->zero_point(); - int32_t output_offset = filter()->zero_point(); - const int width = output()->shape().dim(2); - const int height = output()->shape().dim(1); - - const int filter_width = filter()->shape().dim(2); - const int filter_height = filter()->shape().dim(1); - - int unused_output_height, unused_output_width; - unused_output_width = - computeOutputSize(params().padding, width, filter_width, params().stride_width, 1); - unused_output_height = - computeOutputSize(params().padding, height, filter_height, params().stride_height, 1); - int32_t offset = 0; tflite::ConvParams op_params{}; op_params.padding_type = tflite::PaddingType::kSame; - op_params.padding_values.height = computePaddingWithOffset( - params().stride_height, 1, height, filter_height, unused_output_height, &offset); - op_params.padding_values.width = computePaddingWithOffset( - params().stride_width, 1, width, filter_width, unused_output_width, &offset); + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; op_params.stride_height = params().stride_height; op_params.stride_width = params().stride_width; - op_params.input_offset = input_offset; - op_params.output_offset = output_offset; - op_params.weights_offset = filter_offset; - op_params.output_multiplier = _output_multiplier; - op_params.output_shift = -_output_shift; + // The kernel expects input and filter zero points to be negated. + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.weights_offset = -filter()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_multiplier = _quant_multipliers[0].multiplier; + op_params.output_shift = _quant_multipliers[0].shift; op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min(); op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max(); - tflite::reference_ops::TransposeConv( - op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()), - getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), - getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(), - (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get())); + auto scratch_tensor = getOutputTensors()[1]; + + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData<uint8>(input()), // + getTensorShape(filter()), getTensorData<uint8>(filter()), // + getTensorShape(bias()), getTensorData<int32_t>(bias()), // + getTensorShape(output()), getTensorData<uint8>(output()), // + tflite::RuntimeShape(), nullptr, // + getTensorData<int32_t>(scratch_tensor)); +} + +void TransposeConv::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int32_t>(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t)); + + BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void TransposeConv::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int64_t>(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t)); + + BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h index 3a0eae761..cea0cf3c7 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.h +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h @@ -25,11 +25,16 @@ namespace luci_interpreter namespace kernels { +class ChannelQuantMultipliers; + class TransposeConv : public KernelWithParams<TransposeConvParams> { public: TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms); + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms); + + ~TransposeConv(); const Tensor *output_shape() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } @@ -43,14 +48,15 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _scratch_tensor; - + int32_t _padding_height{}; + int32_t _padding_width{}; // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. - int32_t _output_multiplier = 0; - int _output_shift = 0; + std::vector<ChannelQuantMultipliers> _quant_multipliers; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp index 0fbe9328b..8e9cfc6ad 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -16,6 +16,7 @@ #include "kernels/TransposeConv.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -28,41 +29,51 @@ using namespace testing; template <typename T, typename B> void Check(std::initializer_list<int32_t> output_shape_shape, - std::initializer_list<int32_t> weight_shape, - std::initializer_list<int32_t> input_data_shape, + std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data, - std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data, + std::initializer_list<T> input_data, std::initializer_list<B> bias_data, std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height, - int32_t stride_width, DataType element_type) + int32_t stride_width) { - Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""}; - output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T)); - Tensor weight_tensor{element_type, weight_shape, {}, ""}; - weight_tensor.writeData(weight_data.begin(), weight_data.size() * sizeof(T)); - Tensor input_data_tensor{element_type, input_data_shape, {}, ""}; - input_data_tensor.writeData(input_data_data.begin(), input_data_data.size() * sizeof(T)); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get()); + Tensor weight_tensor = + makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get()); + Tensor input_data_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + + DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(element_type); TransposeConvParams params{}; params.padding = padding; params.stride_height = stride_height; params.stride_width = stride_width; + params.activation = luci::FusedActFunc::NONE; if (bias_data.size() != 0) { - Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data); + Tensor bias_tensor = + makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get()); TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); } else { TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); } EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -71,14 +82,13 @@ void Check(std::initializer_list<int32_t> output_shape_shape, TEST(TransposeConvTest, FloatSimple) { Check<float, float>( - /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1}, - /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, - /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9}, - /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - /*bias_data=*/{}, - /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365}, - /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1, - getElementType<float>()); + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*bias_data=*/{}, + /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); SUCCEED(); } @@ -86,16 +96,15 @@ TEST(TransposeConvTest, FloatSimple) TEST(TransposeConvTest, FloatTwoFiltersTest) { Check<float, float>( - /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2}, - /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1}, - /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, - /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, - /*bias_data=*/{}, - /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, - 3352, 3652, 2760}, - /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1, - getElementType<float>()); + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + /*bias_data=*/{}, + /*output_data=*/ + {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); SUCCEED(); } @@ -103,28 +112,246 @@ TEST(TransposeConvTest, FloatTwoFiltersTest) TEST(TransposeConvTest, SimpleBiasTest) { Check<float, float>( - /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1}, - /*input_shape=*/{1, 2, 2, 1}, - /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2}, - /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}, - /*input_data=*/{1, 2, 3, 4}, - /*bias_data=*/{3, 4}, - /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21, - 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34, - 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, - /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2, - getElementType<float>()); + /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1}, + /*input_shape=*/{1, 2, 2, 1}, + /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2}, + /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + /*input_data=*/{1, 2, 3, 4}, + /*bias_data=*/{3, 4}, + /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21, + 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34, + 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, + /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2); SUCCEED(); } -// TODO Uint8Simple -// Implement GetDequantizedOutput Function. -// Create Test for Uint8 Case +TEST(TransposeConvTest, UInt8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96 + auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, + 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.activation = luci::FusedActFunc::NONE; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, UInt8_CWQ) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + const int32_t output_channels = 2; + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17)); + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.activation = luci::FusedActFunc::NONE; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); -// TODO Uint8FiltersTest -// Implement GetDequantizedOutput Function. -// Create Test for Uint8 Case + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.activation = luci::FusedActFunc::NONE; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16_CWQ_weights) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + const int output_channels = 2; + const Shape input_shape{1, 2, 2, 1}; + const Shape filter_shape{output_channels, 3, 3, 1}; + const Shape bias_shape{output_channels}; + std::vector<int32_t> output_shape_data{1, 5, 5, output_channels}; + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + const float input_scale = 0.25; + const float output_scale = 0.5; + const std::vector<float> filter_scales{0.2f, 0.5f}; + std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale}; + const std::vector<int32_t> zerop(2, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.activation = luci::FusedActFunc::NONE; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} } // namespace } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp new file mode 100644 index 000000000..f049beec4 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp @@ -0,0 +1,892 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/UnidirectionalSequenceLSTM.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/tensor_utils.h> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace lstm +{ +namespace +{ + +using namespace tflite; + +void UpdateLstmCellFloat(int n_batch, int n_cell, float *cell_state, const float *input_gate, + float *forget_gate, const float *cell_gate, bool use_cifg, float clip) +{ +// NOTE tflite source is as is but will fail build with gcc-8 and above +// TODO remove #pragma +#pragma GCC diagnostic ignored "-Wrestrict" + tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, n_batch * n_cell, cell_state); + + if (use_cifg) + { + // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as + // scratch, as input_gate array is not allocated in this case. (Be careful + // not to write to the scratch before reading the forget gate data.) + float *scratch = forget_gate; + tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch); + tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, scratch, n_batch * n_cell, + cell_state); + } + else + { + tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, input_gate, n_batch * n_cell, + cell_state); + } + if (clip > 0.0f) + { + tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip); + } +} + +void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float *cell_state, + const float *output_gate, TfLiteFusedActivation activation, + const float *projection_weights, const float *projection_bias, + const float proj_clip, float *output_state, float *scratch) +{ + tensor_utils::ApplyActivationToVector(cell_state, n_batch * n_cell, activation, scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, n_batch * n_cell, scratch); + + const bool use_projection = (projection_weights != nullptr); + const bool use_projection_bias = (projection_bias != nullptr); + + if (use_projection) + { + if (use_projection_bias) + { + tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, n_batch, output_state); + } + else + { + std::fill_n(output_state, n_batch * n_output, 0.0f); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate(projection_weights, n_output, n_cell, scratch, + n_batch, output_state); + if (proj_clip > 0.0f) + { + tensor_utils::CwiseClipping(output_state, n_batch * n_output, proj_clip); + } + } + else + { + std::copy_n(scratch, n_batch * n_output, output_state); + } +} + +inline void CalculateLstmGateFloat(const float *input, const float *input_to_gate_weights, + const float *aux_input, const float *aux_input_to_gate_weights, + const float *output_state, + const float *recurrent_to_gate_weights, const float *cell_state, + const float *cell_to_gate_weights, + const float *layer_norm_coefficients, const float *gate_bias, + const int n_batch, const int n_input, const int n_aux_input, + const int n_output, const int n_cell, + const TfLiteFusedActivation activation, float *gate, + const bool is_input_all_zeros, const bool is_aux_input_all_zeros) +{ + const bool use_peephole = (cell_to_gate_weights != nullptr); + const bool use_layer_norm = (layer_norm_coefficients != nullptr); + + // Initialize scratch buffers with bias for regular lstm or initialize with + // zero for layer norm lstm. + if (use_layer_norm) + { + std::fill_n(gate, n_cell * n_batch, 0.0f); + } + else + { + tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, gate); + } + // For each batch and cell: compute input_weight * input. + // Skip if input is all zeros. + if (!is_input_all_zeros) + { + tensor_utils::MatrixBatchVectorMultiplyAccumulate(input_to_gate_weights, n_cell, n_input, input, + n_batch, gate); + } + // For each batch and cell: compute aux_input_weight * aux_input. + // Skip if auxiliary input is not available or all zeros. + if (!is_aux_input_all_zeros) + { + tensor_utils::MatrixBatchVectorMultiplyAccumulate(aux_input_to_gate_weights, n_cell, + n_aux_input, aux_input, n_batch, gate); + } + // For each batch and cell: compute recurrent_weight * output_state. + tensor_utils::MatrixBatchVectorMultiplyAccumulate(recurrent_to_gate_weights, n_cell, n_output, + output_state, n_batch, gate); + // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM) + if (use_peephole) + { + tensor_utils::VectorBatchVectorCwiseProductAccumulate(cell_to_gate_weights, n_cell, cell_state, + n_batch, gate); + } + // Do layer normalization (if layer norm LSTM) + if (use_layer_norm) + { + tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch); + tensor_utils::VectorBatchVectorCwiseProduct(layer_norm_coefficients, n_cell, gate, n_batch, + gate); + tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, gate); + } + // Apply activation + tensor_utils::ApplyActivationToVector(gate, n_batch * n_cell, activation, gate); +} + +inline void LstmStepFloat( + const float *input_ptr, const float *input_to_input_weights_ptr, + const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr, + const float *input_to_output_weights_ptr, const float *aux_input_ptr, + const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr, + const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr, + const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr, + const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr, + const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr, + const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr, + const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr, + const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr, + const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr, + const float *output_gate_bias_ptr, const float *projection_weights_ptr, + const float *projection_bias_ptr, const TfLiteLSTMParams *params, int n_batch, int n_cell, + int n_input, int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr, + float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3, + float *output_ptr) +{ + // Since we have already checked that weights are all there or none, we can + // check the existence of only one to the get the condition. + const bool use_cifg = (input_to_input_weights_ptr == nullptr); + + // Make named scratch buffers. + float *input_gate_scratch = scratch0; + float *forget_gate_scratch = scratch1; + float *cell_gate_scratch = scratch2; + float *output_gate_scratch = scratch3; + + // Check if inputs are all zeros so we can skip some computations. + const bool is_input_all_zeros = tensor_utils::IsZeroVector(input_ptr, n_batch * n_input); + const bool is_aux_input_all_zeros = + (aux_input_ptr == nullptr || tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)); + if (!use_cifg) + { + // Calculate the input gate. (If not CIFG.) + CalculateLstmGateFloat(input_ptr, input_to_input_weights_ptr, aux_input_ptr, + aux_input_to_input_weights_ptr, output_state_ptr, + recurrent_to_input_weights_ptr, cell_state_ptr, + cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr, + input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, + /*activation=*/kTfLiteActSigmoid, input_gate_scratch, is_input_all_zeros, + is_aux_input_all_zeros); + } + // Calculate the forget gate. + CalculateLstmGateFloat(input_ptr, input_to_forget_weights_ptr, aux_input_ptr, + aux_input_to_forget_weights_ptr, output_state_ptr, + recurrent_to_forget_weights_ptr, cell_state_ptr, + cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr, + forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, + /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros, + is_aux_input_all_zeros); + // Calculate the cell update gate. + CalculateLstmGateFloat( + input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr, + output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr, + /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch, + n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch, + is_input_all_zeros, is_aux_input_all_zeros); + // Update the cell state. + UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch, + cell_gate_scratch, use_cifg, params->cell_clip); + // Calculate output gate. + CalculateLstmGateFloat(input_ptr, input_to_output_weights_ptr, aux_input_ptr, + aux_input_to_output_weights_ptr, output_state_ptr, + recurrent_to_output_weights_ptr, cell_state_ptr, + cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr, + output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell, + /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros, + is_aux_input_all_zeros); + // Update the output state. + CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch, + params->activation, projection_weights_ptr, projection_bias_ptr, + params->proj_clip, output_state_ptr, scratch2); + // Copy output state to the output. Note that the output's rows may not be + // contiguous (output_batch_leading_dim != n_output). + for (int b = 0; b < n_batch; b++) + { + std::copy_n(output_state_ptr + b * n_output, n_output, + output_ptr + b * output_batch_leading_dim); + } +} + +} // namespace + +void EvalFloat(const Tensor *input, + + const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights, + const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights, + + const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights, + const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights, + + const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights, + const Tensor *cell_to_output_weights, + + const Tensor *input_layer_norm_coefficients, + const Tensor *forget_layer_norm_coefficients, + const Tensor *cell_layer_norm_coefficients, + const Tensor *output_layer_norm_coefficients, + + const Tensor *aux_input, const Tensor *aux_input_to_input_weights, + const Tensor *aux_input_to_forget_weights, const Tensor *aux_input_to_cell_weights, + const Tensor *aux_input_to_output_weights, + + const Tensor *input_gate_bias, const Tensor *forget_gate_bias, + const Tensor *cell_gate_bias, const Tensor *output_gate_bias, + + const Tensor *projection_weights, const Tensor *projection_bias, + const TfLiteLSTMParams *params, + + bool forward_sequence, bool time_major, int output_offset, + + Tensor *scratch_buffer, Tensor *output_state, Tensor *cell_state, Tensor *output) +{ + const Shape &input_shape = input->shape(); + assert(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3); + int max_time, n_batch; + if (input_shape.num_dims() == 3) + { + max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1); + n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0); + } + else + { + max_time = 1; + n_batch = input_shape.dim(0); + } + const int n_input = input_shape.dim(input_shape.num_dims() - 1); + + int aux_input_temp = 0; + if (aux_input) + { + const Shape &aux_input_shape = aux_input->shape(); + aux_input_temp = aux_input_shape.dim(aux_input_shape.num_dims() - 1); + } + const int aux_input_size = aux_input_temp; + + // n_cell and n_output will be the same size when there is no projection. + const Shape &input_to_output_weights_shape = input_to_output_weights->shape(); + const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights->shape(); + const int n_cell = input_to_output_weights_shape.dim(0); + const int n_output = recurrent_to_output_weights_shape.dim(1); + + // Since we have already checked that weights are all there or none, we can + // check the existence of only one to the get the condition. + const bool use_cifg = (input_to_input_weights == nullptr); + + // Index the scratch buffers pointers to the global scratch buffer. + float *scratch_buffer_ptr = getTensorData<float>(scratch_buffer); + float *input_gate_scratch = nullptr; + float *cell_gate_scratch = nullptr; + float *forget_gate_scratch = nullptr; + float *output_gate_scratch = nullptr; + if (use_cifg) + { + cell_gate_scratch = scratch_buffer_ptr; + forget_gate_scratch = scratch_buffer_ptr + n_cell * n_batch; + output_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch; + } + else + { + input_gate_scratch = scratch_buffer_ptr; + cell_gate_scratch = scratch_buffer_ptr + n_cell * n_batch; + forget_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch; + output_gate_scratch = scratch_buffer_ptr + 3 * n_cell * n_batch; + } + + const Shape &output_shape = output->shape(); + const int output_batch_leading_dim = output_shape.dim(output_shape.num_dims() - 1); + if (time_major) + { + // Loop through the sequence. + const int input_step = n_batch * n_input; + const int output_step = n_batch * output_batch_leading_dim; + for (int t = 0; t < max_time; t++) + { + // If this is the forward_sequence, step forward, otherwise step + // backwards. + const int t_rel = forward_sequence ? t : max_time - t - 1; + const float *input_ptr = getTensorData<float>(input) + t_rel * input_step; + const float *aux_input_ptr = nullptr; + if (aux_input) + { + aux_input_ptr = getTensorData<float>(aux_input) + t_rel * input_step; + } + float *output_ptr = getTensorData<float>(output) + t_rel * output_step + output_offset; + + LstmStepFloat( + input_ptr, getTensorData<float>(input_to_input_weights), + getTensorData<float>(input_to_forget_weights), getTensorData<float>(input_to_cell_weights), + getTensorData<float>(input_to_output_weights), aux_input_ptr, + getTensorData<float>(aux_input_to_input_weights), + getTensorData<float>(aux_input_to_forget_weights), + getTensorData<float>(aux_input_to_cell_weights), + getTensorData<float>(aux_input_to_output_weights), + getTensorData<float>(recurrent_to_input_weights), + getTensorData<float>(recurrent_to_forget_weights), + getTensorData<float>(recurrent_to_cell_weights), + getTensorData<float>(recurrent_to_output_weights), + getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights), + getTensorData<float>(cell_to_output_weights), + getTensorData<float>(input_layer_norm_coefficients), + getTensorData<float>(forget_layer_norm_coefficients), + getTensorData<float>(cell_layer_norm_coefficients), + getTensorData<float>(output_layer_norm_coefficients), getTensorData<float>(input_gate_bias), + getTensorData<float>(forget_gate_bias), getTensorData<float>(cell_gate_bias), + getTensorData<float>(output_gate_bias), getTensorData<float>(projection_weights), + getTensorData<float>(projection_bias), params, n_batch, n_cell, n_input, aux_input_size, + n_output, output_batch_leading_dim, getTensorData<float>(output_state), + getTensorData<float>(cell_state), input_gate_scratch, forget_gate_scratch, + cell_gate_scratch, output_gate_scratch, output_ptr); + } + } + else + { + for (int b = 0; b < n_batch; b++) + { + const int input_step = n_input; + const int output_step = output_batch_leading_dim; + for (int t = 0; t < max_time; t++) + { + // If this is the forward_sequence, step forward, otherwise step + // backwards. + const int t_rel = forward_sequence ? t : max_time - t - 1; + const int time_offset = b * max_time + t_rel; + const float *input_ptr = getTensorData<float>(input) + time_offset * input_step; + const float *aux_input_ptr = nullptr; + if (aux_input) + { + aux_input_ptr = getTensorData<float>(aux_input) + time_offset * input_step; + } + float *output_ptr = + getTensorData<float>(output) + time_offset * output_step + output_offset; + + // Offset the {output,cell}_state pointers to the right batch. + float *output_state_ptr = getTensorData<float>(output_state) + b * output_batch_leading_dim; + float *cell_state_ptr = getTensorData<float>(cell_state) + b * n_cell; + // Offset the scratch pointers to the right batch. + float *input_gate_scratch_ptr = + input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr; + float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell; + float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell; + float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell; + + LstmStepFloat( + input_ptr, getTensorData<float>(input_to_input_weights), + getTensorData<float>(input_to_forget_weights), + getTensorData<float>(input_to_cell_weights), + getTensorData<float>(input_to_output_weights), aux_input_ptr, + getTensorData<float>(aux_input_to_input_weights), + getTensorData<float>(aux_input_to_forget_weights), + getTensorData<float>(aux_input_to_cell_weights), + getTensorData<float>(aux_input_to_output_weights), + getTensorData<float>(recurrent_to_input_weights), + getTensorData<float>(recurrent_to_forget_weights), + getTensorData<float>(recurrent_to_cell_weights), + getTensorData<float>(recurrent_to_output_weights), + getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights), + getTensorData<float>(cell_to_output_weights), + getTensorData<float>(input_layer_norm_coefficients), + getTensorData<float>(forget_layer_norm_coefficients), + getTensorData<float>(cell_layer_norm_coefficients), + getTensorData<float>(output_layer_norm_coefficients), + getTensorData<float>(input_gate_bias), getTensorData<float>(forget_gate_bias), + getTensorData<float>(cell_gate_bias), getTensorData<float>(output_gate_bias), + getTensorData<float>(projection_weights), getTensorData<float>(projection_bias), params, + /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim, + output_state_ptr, cell_state_ptr, input_gate_scratch_ptr, forget_gate_scratch_ptr, + cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr); + } + } + } +} + +} // namespace lstm +} // namespace kernels +} // namespace luci_interpreter + +namespace luci_interpreter +{ +namespace kernels +{ + +UnidirectionalSequenceLSTM::UnidirectionalSequenceLSTM( + const Tensor *input, + + const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights, + const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights, + + const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights, + const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights, + + const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights, + const Tensor *cell_to_output_weights, + + const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias, + const Tensor *output_gate_bias, + + const Tensor *projection_weights, const Tensor *projection_bias, + + const Tensor *output_state, const Tensor *cell_state, const Tensor *input_layer_norm_coefficients, + const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients, + const Tensor *output_layer_norm_coefficients, + + Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3, + const UnidirectionalSequenceLSTMParams ¶ms) + : KernelWithParams<UnidirectionalSequenceLSTMParams>( + {input, + input_to_input_weights, + input_to_forget_weights, + input_to_cell_weights, + input_to_output_weights, + + recurrent_to_input_weights, + recurrent_to_forget_weights, + recurrent_to_cell_weights, + recurrent_to_output_weights, + + cell_to_input_weights, + cell_to_forget_weights, + cell_to_output_weights, + + input_gate_bias, + forget_gate_bias, + cell_gate_bias, + output_gate_bias, + + projection_weights, + projection_bias, + + output_state, + cell_state, + + input_layer_norm_coefficients, + forget_layer_norm_coefficients, + cell_layer_norm_coefficients, + output_layer_norm_coefficients}, + {output, scratchpad_1, scratchpad_2, scratchpad_3}, params) +{ + // Do nothing +} + +// Check that input tensor dimensions matches with each other. +void UnidirectionalSequenceLSTM::check_input_tensor_dimensions(int n_input, int n_output, + int n_cell, bool use_layer_norm, + bool is_integer) +{ + // Making sure clipping parameters have valid values. + // == 0 means no clipping + // > 0 means clipping + LUCI_INTERPRETER_CHECK(params().cell_clip >= 0); + LUCI_INTERPRETER_CHECK(params().proj_clip >= 0); + + if (input_to_input_weights() != nullptr) + { + const Shape &input_to_input_weights_shape = input_to_input_weights()->shape(); + LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(1) == n_input); + } + + const Shape &input_to_forget_weights_shape = input_to_forget_weights()->shape(); + LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(1) == n_input); + + const Shape &input_to_cell_weights_shape = input_to_cell_weights()->shape(); + LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(1) == n_input); + + if (recurrent_to_input_weights() != nullptr) + { + const Shape &recurrent_to_input_weights_shape = recurrent_to_input_weights()->shape(); + LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(1) == n_output); + } + + const Shape &recurrent_to_forget_weights_shape = recurrent_to_forget_weights()->shape(); + LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(1) == n_output); + + const Shape &recurrent_to_cell_weights_shape = recurrent_to_cell_weights()->shape(); + LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(1) == n_output); + + // We make sure the input-gate's parameters are either both present (regular + // LSTM) or not at all (CIFG-LSTM). + const bool cifg_weights_all_or_none = + ((input_to_input_weights() != nullptr) && (recurrent_to_input_weights() != nullptr)) || + ((input_to_input_weights() == nullptr) && (recurrent_to_input_weights() == nullptr)); + LUCI_INTERPRETER_CHECK(cifg_weights_all_or_none == true); + + if (cell_to_input_weights() != nullptr) + { + const Shape &cell_to_input_weights_shape = cell_to_input_weights()->shape(); + LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(is_integer + ? cell_to_input_weights()->element_type() == loco::DataType::S16 + : cell_to_input_weights()->element_type() == + input_to_forget_weights()->element_type()); + } + + if (cell_to_forget_weights() != nullptr) + { + const Shape &cell_to_forget_weights_shape = cell_to_forget_weights()->shape(); + LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(is_integer + ? cell_to_forget_weights()->element_type() == loco::DataType::S16 + : cell_to_forget_weights()->element_type() == + input_to_forget_weights()->element_type()); + } + + if (cell_to_output_weights() != nullptr) + { + const Shape &cell_to_output_weights_shape = cell_to_output_weights()->shape(); + LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.dim(0) == n_cell); + LUCI_INTERPRETER_CHECK(is_integer + ? cell_to_output_weights()->element_type() == loco::DataType::S16 + : cell_to_output_weights()->element_type() == + input_to_forget_weights()->element_type()); + } + + // Making sure the peephole weights are there all or none. + const bool use_cifg = (input_to_input_weights() == nullptr); + const bool peephole_weights_all_or_none = + ((cell_to_input_weights() != nullptr || use_cifg) && (cell_to_forget_weights() != nullptr) && + (cell_to_output_weights() != nullptr)) || + ((cell_to_input_weights() == nullptr) && (cell_to_forget_weights() == nullptr) && + (cell_to_output_weights() == nullptr)); + LUCI_INTERPRETER_CHECK(peephole_weights_all_or_none == true); + + // Make sure the input gate bias is present only when not a CIFG-LSTM. + if (use_cifg) + { + LUCI_INTERPRETER_CHECK(input_gate_bias() == nullptr); + } + else + { + const Shape &input_gate_bias_shape = input_gate_bias()->shape(); + LUCI_INTERPRETER_CHECK(input_gate_bias_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(input_gate_bias_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::S32); + } + else + { + LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::FLOAT32); + } + } + + const Shape &forget_gate_bias_shape = forget_gate_bias()->shape(); + LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::S32); + } + else + { + LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::FLOAT32); + } + + const Shape &cell_gate_bias_shape = cell_gate_bias()->shape(); + LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::S32); + } + else + { + LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::FLOAT32); + } + + const Shape &output_gate_bias_shape = output_gate_bias()->shape(); + LUCI_INTERPRETER_CHECK(output_gate_bias_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(output_gate_bias_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::S32); + } + else + { + LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::FLOAT32); + } + + if (projection_weights() != nullptr) + { + const Shape &projection_weights_shape = projection_weights()->shape(); + LUCI_INTERPRETER_CHECK(projection_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(0) == n_output); + LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(1) == n_cell); + } + + if (projection_bias() != nullptr) + { + const Shape &projection_bias_shape = projection_bias()->shape(); + LUCI_INTERPRETER_CHECK(projection_bias_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(projection_bias_shape.dim(0) == n_output); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::S32); + } + else + { + LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::FLOAT32); + } + } + + // Making sure the projection tensors are consistent: + // 1) If projection weight is not present, then projection bias should not be + // present. + // 2) If projection weight is present, then projection bias is optional. + // TODO(ghodrat): make sure this is correct. + const bool projecton_tensors_consistent = + ((projection_weights() != nullptr) || (projection_bias() == nullptr)); + LUCI_INTERPRETER_CHECK(projecton_tensors_consistent == true); + + if (use_layer_norm) + { + if (use_cifg) + { + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() == nullptr); + } + else + { + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() != nullptr) + + const Shape &input_layer_norm_coefficients_shape = input_layer_norm_coefficients()->shape(); + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() == + loco::DataType::S16); + } + else + { + LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() == + loco::DataType::FLOAT32); + } + } + + const Shape &forget_layer_norm_coefficients_shape = forget_layer_norm_coefficients()->shape(); + LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() == + loco::DataType::S16); + } + else + { + LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() == + loco::DataType::FLOAT32); + } + + const Shape &cell_layer_norm_coefficients_shape = cell_layer_norm_coefficients()->shape(); + LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() == loco::DataType::S16); + } + else + { + LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() == + loco::DataType::FLOAT32); + } + + const Shape &output_layer_norm_coefficients_shape = output_layer_norm_coefficients()->shape(); + LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.num_dims() == 1); + LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.dim(0) == n_cell); + if (is_integer) + { + LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() == + loco::DataType::S16); + } + else + { + LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() == + loco::DataType::FLOAT32); + } + } +} + +void UnidirectionalSequenceLSTM::configure() +{ + LUCI_INTERPRETER_CHECK(getInputTensors().size() == 24); + LUCI_INTERPRETER_CHECK(getOutputTensors().size() >= 1); + + // TODO support U8 + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32); + const bool is_integer = false; + const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr); + + // Inferring batch size, number of outputs and sequence length and + // number of cells from the input tensors. + const Shape &input_shape = input()->shape(); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 1); + const bool time_major = params().time_major; + const int n_batch = time_major ? input_shape.dim(1) : input_shape.dim(0); + // NOTE as dim(2) is accessed, we need to check this is valid + LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 2); + const int n_input = input_shape.dim(2); + + const Shape &input_to_output_weights_shape = input_to_output_weights()->shape(); + const int n_cell = input_to_output_weights_shape.dim(0); + LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.dim(1) == n_input); + + const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights()->shape(); + LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.dim(0) == n_cell); + + const int n_output = recurrent_to_output_weights_shape.dim(1); + + // Check that input tensor dimensions matches with each other. + check_input_tensor_dimensions(n_input, n_output, n_cell, use_layer_norm, is_integer); + + // Check the shape of input state tensors. + // These tensor may be 1D or 2D. It's fine as long as the total size is + // correct. + const Shape &output_state_shape = output_state()->shape(); + const Shape &cell_state_shape = cell_state()->shape(); + LUCI_INTERPRETER_CHECK(output_state_shape.num_elements() == n_batch * n_output); + LUCI_INTERPRETER_CHECK(cell_state_shape.num_elements() == n_batch * n_cell); + + // Resize the output tensors. + Shape output_shape = Shape(input_shape.num_dims()); + for (int i = 0; i < input_shape.num_dims() - 1; i++) + { + output_shape.dim(i) = input_shape.dim(i); + } + output_shape.dim(input_shape.num_dims() - 1) = n_output; + output()->resize(output_shape); + + // TODO import integer + + // output_state and cell_state are variable tensor; use scratchpad. + getOutputTensors()[1]->resize(output_state_shape); + getOutputTensors()[2]->resize(cell_state_shape); + + const bool use_cifg = (input_to_input_weights() == nullptr); + if (use_cifg) + getOutputTensors()[3]->resize({n_batch, n_cell * 3}); + else + getOutputTensors()[3]->resize({n_batch, n_cell * 4}); + + // hybrid not supported + if (input_to_output_weights()->element_type() == loco::DataType::U8 && + input()->element_type() == loco::DataType::FLOAT32) + { + throw std::runtime_error("Hybrid type is not currently supported"); + } + // TODO support hybrid + // TODO support U8 +} + +void UnidirectionalSequenceLSTM::execute() const +{ + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type"); + } +} + +void UnidirectionalSequenceLSTM::evalFloat() const +{ + const bool time_major = params().time_major; + const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr); + + const Tensor *t_input_layer_norm_coefficients = + use_layer_norm ? input_layer_norm_coefficients() : nullptr; + const Tensor *t_forget_layer_norm_coefficients = + use_layer_norm ? forget_layer_norm_coefficients() : nullptr; + const Tensor *t_cell_layer_norm_coefficients = + use_layer_norm ? cell_layer_norm_coefficients() : nullptr; + const Tensor *t_output_layer_norm_coefficients = + use_layer_norm ? output_layer_norm_coefficients() : nullptr; + + Tensor *sp_output_state = getOutputTensors()[1]; + Tensor *sp_cell_state = getOutputTensors()[2]; + Tensor *sp_scratch_buffer = getOutputTensors()[3]; + + // Note: it is expected that output_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<float>(sp_output_state); + std::fill_n(scratchpad_data, sp_output_state->shape().num_elements(), 0); + scratchpad_data = getTensorData<float>(sp_cell_state); + std::fill_n(scratchpad_data, sp_cell_state->shape().num_elements(), 0); + scratchpad_data = getTensorData<float>(sp_scratch_buffer); + std::fill_n(scratchpad_data, sp_scratch_buffer->shape().num_elements(), 0); + + TfLiteLSTMParams lstm_params{}; + lstm_params.activation = getTfLiteActivation(params().activation); + lstm_params.cell_clip = params().cell_clip; + lstm_params.proj_clip = params().proj_clip; + lstm_params.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + + lstm::EvalFloat(input(), input_to_input_weights(), input_to_forget_weights(), + input_to_cell_weights(), input_to_output_weights(), + + recurrent_to_input_weights(), recurrent_to_forget_weights(), + recurrent_to_cell_weights(), recurrent_to_output_weights(), + + cell_to_input_weights(), cell_to_forget_weights(), cell_to_output_weights(), + + t_input_layer_norm_coefficients, t_forget_layer_norm_coefficients, + t_cell_layer_norm_coefficients, t_output_layer_norm_coefficients, + /*aux_input=*/nullptr, + /*aux_input_to_input_weights=*/nullptr, + /*aux_input_to_forget_weights=*/nullptr, + /*aux_input_to_cell_weights=*/nullptr, + /*aux_input_to_output_weights=*/nullptr, input_gate_bias(), forget_gate_bias(), + cell_gate_bias(), output_gate_bias(), + + projection_weights(), projection_bias(), &lstm_params, + /*forward_sequence=*/true, time_major, + /*output_offset=*/0, sp_scratch_buffer, sp_output_state, sp_cell_state, output()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h new file mode 100644 index 000000000..b8125111b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H +#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class UnidirectionalSequenceLSTM : public KernelWithParams<UnidirectionalSequenceLSTMParams> +{ +public: + UnidirectionalSequenceLSTM( + const Tensor *input, + + const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights, + const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights, + + const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights, + const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights, + + const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights, + const Tensor *cell_to_output_weights, + + const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias, + const Tensor *output_gate_bias, + + const Tensor *projection_weights, const Tensor *projection_bias, + + const Tensor *output_state, const Tensor *cell_state, + + const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients, + const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients, + + Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3, + const UnidirectionalSequenceLSTMParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + + const Tensor *input_to_input_weights() const { return _inputs[1]; } + const Tensor *input_to_forget_weights() const { return _inputs[2]; } + const Tensor *input_to_cell_weights() const { return _inputs[3]; } + const Tensor *input_to_output_weights() const { return _inputs[4]; } + + const Tensor *recurrent_to_input_weights() const { return _inputs[5]; } + const Tensor *recurrent_to_forget_weights() const { return _inputs[6]; } + const Tensor *recurrent_to_cell_weights() const { return _inputs[7]; } + const Tensor *recurrent_to_output_weights() const { return _inputs[8]; } + + const Tensor *cell_to_input_weights() const { return _inputs[9]; } + const Tensor *cell_to_forget_weights() const { return _inputs[10]; } + const Tensor *cell_to_output_weights() const { return _inputs[11]; } + + const Tensor *input_gate_bias() const { return _inputs[12]; } + const Tensor *forget_gate_bias() const { return _inputs[13]; } + const Tensor *cell_gate_bias() const { return _inputs[14]; } + const Tensor *output_gate_bias() const { return _inputs[15]; } + + const Tensor *projection_weights() const { return _inputs[16]; } + const Tensor *projection_bias() const { return _inputs[17]; } + + const Tensor *output_state() const { return _inputs[18]; } + const Tensor *cell_state() const { return _inputs[19]; } + + const Tensor *input_layer_norm_coefficients() const { return _inputs[20]; } + const Tensor *forget_layer_norm_coefficients() const { return _inputs[21]; } + const Tensor *cell_layer_norm_coefficients() const { return _inputs[22]; } + const Tensor *output_layer_norm_coefficients() const { return _inputs[23]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + +private: + void check_input_tensor_dimensions(int n_input, int n_output, int n_cell, bool use_layer_norm, + bool is_integer); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp new file mode 100644 index 000000000..df059cfcc --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp @@ -0,0 +1,565 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/UnidirectionalSequenceLSTM.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class UnidirectionalSequenceLSTMTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest +TEST_F(UnidirectionalSequenceLSTMTest, FloatTest) +{ + const int32_t n_batch = 1; + const int32_t n_input = 2; + const int32_t n_cell = 4; + const int32_t n_output = 4; + const int32_t sequence_length = 3; + + std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + + std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163, + -0.20583314, 0.44344562, 0.22077113, -0.29909778}; + + std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272, + -0.40032279, 0.44781327, 0.01387155, -0.35593212}; + + std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138, + 0.44272184, 0.03897077, -0.1556896, 0.19487578}; + + std::vector<float> input_gate_bias = {0., 0., 0., 0.}; + std::vector<float> forget_gate_bias = {1., 1., 1., 1.}; + std::vector<float> cell_gate_bias = {0., 0., 0., 0.}; + std::vector<float> output_gate_bias = {0., 0., 0., 0.}; + + std::vector<float> recurrent_to_input_weights = { + -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324, + -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998, + 0.24077177, -0.51332325, -0.33502164, 0.10629296}; + + std::vector<float> recurrent_to_forget_weights = { + -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892, + -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436, + 0.28053468, 0.01560611, -0.20127171, -0.01140004}; + + std::vector<float> recurrent_to_cell_weights = { + -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841, + -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659, + -0.46367589, 0.26016325, -0.03894562, -0.16368064}; + + std::vector<float> recurrent_to_output_weights = { + 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793, + 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421, + -0.51818722, -0.15390486, 0.0468148, 0.39922136}; + + Shape input_to_input_weights_shape{n_cell, n_input}; + Shape input_to_cell_weights_shape{n_cell, n_input}; + Shape input_to_forget_weights_shape{n_cell, n_input}; + Shape input_to_output_weights_shape{n_cell, n_input}; + + Shape input_gate_bias_shape{n_cell}; + Shape forget_gate_bias_shape{n_cell}; + Shape cell_gate_bias_shape{n_cell}; + Shape output_gate_bias_shape{n_cell}; + + Shape recurrent_to_input_weights_shape{n_cell, n_output}; + Shape recurrent_to_cell_weights_shape{n_cell, n_output}; + Shape recurrent_to_forget_weights_shape{n_cell, n_output}; + Shape recurrent_to_output_weights_shape{n_cell, n_output}; + + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get()); + Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get()); + Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_output_weights_shape, input_to_output_weights, _memory_manager.get()); + + Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + input_gate_bias_shape, input_gate_bias, _memory_manager.get()); + Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + forget_gate_bias_shape, forget_gate_bias, _memory_manager.get()); + Tensor cell_gate_bias_tensor = + makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get()); + Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + output_gate_bias_shape, output_gate_bias, _memory_manager.get()); + + Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get()); + Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get()); + Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get()); + Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get()); + + std::vector<float> input_data{2., 3., 3., 4., 1., 1.}; + Shape input_shape{sequence_length, n_batch, n_input}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + + Shape output_state_shape{n_batch, n_output}; + Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32); + output_state_tensor.resize(output_state_shape); + + Shape cell_state_shape{n_batch, n_cell}; + Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32); + cell_state_tensor.resize(cell_state_shape); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 0.0; + params.proj_clip = 0.0; + params.time_major = true; + params.asymmetric_quantize_inputs = false; + + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor, + &input_to_cell_weights_tensor, &input_to_output_weights_tensor, + &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor, + &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr, + nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor, + &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr, + nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(output_state_tensor); + _memory_manager->allocate_memory(cell_state_tensor); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + kernel.execute(); + + std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765, + -0.03716109, 0.12507336, 0.41193449, -0.20860538, + -0.15053082, 0.09120187, 0.24278517, -0.12222792}; + + std::vector<float> ref_output_shape{sequence_length, n_batch, n_output}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch) +{ + const int32_t n_batch = 1; + const int32_t n_input = 2; + const int32_t n_cell = 4; + const int32_t n_output = 4; + const int32_t sequence_length = 3; + + std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + + std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163, + -0.20583314, 0.44344562, 0.22077113, -0.29909778}; + + std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272, + -0.40032279, 0.44781327, 0.01387155, -0.35593212}; + + std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138, + 0.44272184, 0.03897077, -0.1556896, 0.19487578}; + + std::vector<float> input_gate_bias = {0., 0., 0., 0.}; + std::vector<float> forget_gate_bias = {1., 1., 1., 1.}; + std::vector<float> cell_gate_bias = {0., 0., 0., 0.}; + std::vector<float> output_gate_bias = {0., 0., 0., 0.}; + + std::vector<float> recurrent_to_input_weights = { + -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324, + -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998, + 0.24077177, -0.51332325, -0.33502164, 0.10629296}; + + std::vector<float> recurrent_to_forget_weights = { + -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892, + -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436, + 0.28053468, 0.01560611, -0.20127171, -0.01140004}; + + std::vector<float> recurrent_to_cell_weights = { + -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841, + -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659, + -0.46367589, 0.26016325, -0.03894562, -0.16368064}; + + std::vector<float> recurrent_to_output_weights = { + 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793, + 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421, + -0.51818722, -0.15390486, 0.0468148, 0.39922136}; + + Shape input_to_input_weights_shape{n_cell, n_input}; + Shape input_to_cell_weights_shape{n_cell, n_input}; + Shape input_to_forget_weights_shape{n_cell, n_input}; + Shape input_to_output_weights_shape{n_cell, n_input}; + + Shape input_gate_bias_shape{n_cell}; + Shape forget_gate_bias_shape{n_cell}; + Shape cell_gate_bias_shape{n_cell}; + Shape output_gate_bias_shape{n_cell}; + + Shape recurrent_to_input_weights_shape{n_cell, n_output}; + Shape recurrent_to_cell_weights_shape{n_cell, n_output}; + Shape recurrent_to_forget_weights_shape{n_cell, n_output}; + Shape recurrent_to_output_weights_shape{n_cell, n_output}; + + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get()); + Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get()); + Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_output_weights_shape, input_to_output_weights, _memory_manager.get()); + + Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + input_gate_bias_shape, input_gate_bias, _memory_manager.get()); + Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + forget_gate_bias_shape, forget_gate_bias, _memory_manager.get()); + Tensor cell_gate_bias_tensor = + makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get()); + Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + output_gate_bias_shape, output_gate_bias, _memory_manager.get()); + + Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get()); + Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get()); + Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get()); + Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get()); + + std::vector<float> input_data{2., 3., 3., 4., 1., 1.}; + Shape input_shape{n_batch, sequence_length, n_input}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + + Shape output_state_shape{n_batch, n_output}; + Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32); + output_state_tensor.resize(output_state_shape); + + Shape cell_state_shape{n_batch, n_cell}; + Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32); + cell_state_tensor.resize(cell_state_shape); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 0.0; + params.proj_clip = 0.0; + params.time_major = false; + params.asymmetric_quantize_inputs = false; + + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor, + &input_to_cell_weights_tensor, &input_to_output_weights_tensor, + &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor, + &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr, + nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor, + &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr, + nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor, + &scratchpad_1, params); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(output_state_tensor); + _memory_manager->allocate_memory(cell_state_tensor); + _memory_manager->allocate_memory(scratchpad_1); + kernel.execute(); + + std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765, + -0.03716109, 0.12507336, 0.41193449, -0.20860538, + -0.15053082, 0.09120187, 0.24278517, -0.12222792}; + + std::vector<float> ref_output_shape{n_batch, sequence_length, n_output}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple) +{ + const int32_t n_batch = 1; + const int32_t n_input = 1; + const int32_t n_cell = 1; + const int32_t n_output = 1; + const int32_t sequence_length = 1; + + std::vector<float> input_to_input_weights = {0.329067}; + std::vector<float> input_to_forget_weights = {0.308059}; + std::vector<float> input_to_cell_weights = {0.152916}; + std::vector<float> input_to_output_weights = {-0.476033}; + + std::vector<float> input_gate_bias = {0.}; + std::vector<float> forget_gate_bias = {1.}; + std::vector<float> cell_gate_bias = {0.}; + std::vector<float> output_gate_bias = {0.}; + + std::vector<float> recurrent_to_input_weights = {0.207806}; + std::vector<float> recurrent_to_forget_weights = {0.028718}; + std::vector<float> recurrent_to_cell_weights = {-0.182756}; + std::vector<float> recurrent_to_output_weights = {-0.960517}; + + Shape input_to_input_weights_shape{n_cell, n_input}; + Shape input_to_cell_weights_shape{n_cell, n_input}; + Shape input_to_forget_weights_shape{n_cell, n_input}; + Shape input_to_output_weights_shape{n_cell, n_input}; + + Shape input_gate_bias_shape{n_cell}; + Shape forget_gate_bias_shape{n_cell}; + Shape cell_gate_bias_shape{n_cell}; + Shape output_gate_bias_shape{n_cell}; + + Shape recurrent_to_input_weights_shape{n_cell, n_output}; + Shape recurrent_to_cell_weights_shape{n_cell, n_output}; + Shape recurrent_to_forget_weights_shape{n_cell, n_output}; + Shape recurrent_to_output_weights_shape{n_cell, n_output}; + + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get()); + Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get()); + Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_output_weights_shape, input_to_output_weights, _memory_manager.get()); + + Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + input_gate_bias_shape, input_gate_bias, _memory_manager.get()); + Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + forget_gate_bias_shape, forget_gate_bias, _memory_manager.get()); + Tensor cell_gate_bias_tensor = + makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get()); + Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>( + output_gate_bias_shape, output_gate_bias, _memory_manager.get()); + + Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get()); + Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get()); + Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get()); + Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>( + recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get()); + + std::vector<float> input_data{0.03653763}; + Shape input_shape{n_batch, sequence_length, n_input}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + + Shape output_state_shape{n_batch, n_output}; + Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32); + output_state_tensor.resize(output_state_shape); + + Shape cell_state_shape{n_batch, n_cell}; + Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32); + cell_state_tensor.resize(cell_state_shape); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 10.0; + params.proj_clip = 0.0; + params.time_major = false; + params.asymmetric_quantize_inputs = false; + + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor, + &input_to_cell_weights_tensor, &input_to_output_weights_tensor, + &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor, + &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr, + nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor, + &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr, + nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor, + &scratchpad_1, params); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(output_state_tensor); + _memory_manager->allocate_memory(cell_state_tensor); + _memory_manager->allocate_memory(scratchpad_1); + kernel.execute(); + + std::vector<float> ref_output_data{0.00139296}; + std::vector<float> ref_output_shape{n_batch, sequence_length, n_output}; + const float tolerance = 1e-5; + auto aa = extractTensorData<float>(output_tensor); + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG) +{ + const int32_t n_batch = 1; + const int32_t n_input = 2; + const int32_t n_cell = 4; + const int32_t n_output = 4; + const int32_t sequence_length = 3; + + std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now + Shape input_shape{sequence_length, n_batch, n_input}; + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get()); + + std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + Shape input_to_input_weights_shape{n_cell, n_input}; + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 0.0; + params.proj_clip = 0.0; + params.time_major = true; + params.asymmetric_quantize_inputs = false; + + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr, + nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG) +{ + const int32_t n_batch = 1; + const int32_t n_input = 2; + const int32_t n_cell = 4; + const int32_t n_output = 4; + const int32_t sequence_length = 3; + + std::vector<float> input_data{2., 3., 3., 4., 1., 1.}; + Shape input_shape{sequence_length, n_input}; // this is wrong + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + + std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + Shape input_to_input_weights_shape{n_cell, n_input}; + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 0.0; + params.proj_clip = 0.0; + params.time_major = true; + params.asymmetric_quantize_inputs = false; + + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr, + nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG) +{ + const int32_t n_batch = 1; + const int32_t n_input = 2; + const int32_t n_cell = 4; + const int32_t n_output = 4; + const int32_t sequence_length = 3; + + std::vector<float> input_data{2., 3., 3., 4., 1., 1.}; + Shape input_shape{sequence_length, n_batch, n_input}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + + std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + Shape input_to_input_weights_shape{n_cell, n_input}; + Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>( + input_to_input_weights_shape, input_to_input_weights, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = Activation::TANH; + params.cell_clip = 0.0; + params.proj_clip = 0.0; + params.time_major = true; + params.asymmetric_quantize_inputs = false; + + // NOTE provide wrong shaped inputs + UnidirectionalSequenceLSTM kernel( + &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, + &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr, + nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-interpreter/src/kernels/Unpack.cpp index 834b79926..9127241c0 100644 --- a/compiler/luci-interpreter/src/kernels/Unpack.cpp +++ b/compiler/luci-interpreter/src/kernels/Unpack.cpp @@ -29,7 +29,7 @@ namespace kernels { Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms) - : KernelWithParams<UnpackParams>({input}, std::move(outputs), params) + : KernelWithParams<UnpackParams>({input}, std::move(outputs), params) { } diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp index f70c5847a..9384ddc83 100644 --- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Unpack.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,10 +33,12 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data, const std::vector<std::initializer_list<int32_t>> &exp_output_shape, std::vector<std::initializer_list<T>> exp_output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<Tensor> output_tensors; output_tensors.reserve(num_outputs); for (int i = 0; i < num_outputs; ++i) @@ -54,6 +57,10 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data, Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params); kernel.configure(); + for (int i = 0; i < num_outputs; i++) + { + memory_manager->allocate_memory(output_tensors[i]); + } kernel.execute(); for (int i = 0; i < num_outputs; ++i) @@ -68,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test }; using DataTypes = ::testing::Types<float, uint8_t>; -TYPED_TEST_CASE(UnpackTest, DataTypes); +TYPED_TEST_SUITE(UnpackTest, DataTypes); TYPED_TEST(UnpackTest, ThreeOutputs) { @@ -121,11 +128,11 @@ TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs) TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs) { Check<TypeParam>( - /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1}, - /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}}, - /*exp_output_data=*/ - {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}}); + /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}}, + /*exp_output_data=*/ + {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}}); } TYPED_TEST(UnpackTest, VectorToScalar) diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp index b9e7738a9..a04dbcc0f 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.cpp +++ b/compiler/luci-interpreter/src/kernels/Utils.cpp @@ -27,17 +27,39 @@ namespace luci_interpreter namespace kernels { -void calculateActivationRange(Activation activation, float *activation_min, float *activation_max) +TfLiteFusedActivation getTfLiteActivation(Activation activation) +{ + switch (activation) + { + case luci::FusedActFunc::RELU: + return kTfLiteActRelu; + case luci::FusedActFunc::RELU6: + return kTfLiteActRelu6; + case luci::FusedActFunc::RELU_N1_TO_1: + return kTfLiteActReluN1To1; + case luci::FusedActFunc::TANH: + return kTfLiteActTanh; + case luci::FusedActFunc::SIGN_BIT: + return kTfLiteActSignBit; + case luci::FusedActFunc::NONE: + return kTfLiteActNone; + default: + throw std::runtime_error("Unsupported activation type"); + } +} + +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max) { switch (activation) { case Activation::NONE: - *activation_min = std::numeric_limits<float>::lowest(); - *activation_max = std::numeric_limits<float>::max(); + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); break; case Activation::RELU: *activation_min = 0; - *activation_max = std::numeric_limits<float>::max(); + *activation_max = std::numeric_limits<T>::max(); break; case Activation::RELU_N1_TO_1: *activation_min = -1; @@ -52,6 +74,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa } } +template void calculateActivationRange(Activation activation, float *activation_min, + float *activation_max); +template void calculateActivationRange(Activation activation, int32_t *activation_min, + int32_t *activation_max); +template void calculateActivationRange(Activation activation, int64_t *activation_min, + int64_t *activation_max); + static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax, const Tensor *output, int32_t *activation_min, int32_t *activation_max) @@ -66,6 +95,7 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t switch (activation) { case Activation::NONE: + case Activation::TANH: *activation_min = qmin; *activation_max = qmax; break; @@ -89,20 +119,23 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max) { + assert(output->zero_points().size() == 1); int32_t qmin{}; int32_t qmax{}; switch (output->element_type()) { case DataType::U8: - qmin = std::numeric_limits<uint8_t>::min(); + qmin = 0; qmax = std::numeric_limits<uint8_t>::max(); break; case DataType::S8: - qmin = std::numeric_limits<int8_t>::min(); + qmin = -std::numeric_limits<int8_t>::max(); qmax = std::numeric_limits<int8_t>::max(); break; case DataType::S16: - qmin = std::numeric_limits<int16_t>::min(); + // For now, assume that signed int16 type implies signed symmetric quantization. + assert(output->zero_point() == 0); + qmin = -std::numeric_limits<int16_t>::max(); qmax = std::numeric_limits<int16_t>::max(); break; default: @@ -171,7 +204,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_ { const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1; const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1; - assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1); + + bool need_broadcast = input1_dim != input2_dim; + bool can_broadcast = input1_dim == 1 || input2_dim == 1; + LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast); + output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim); } diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h index 7927151c6..e975585cd 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.h +++ b/compiler/luci-interpreter/src/kernels/Utils.h @@ -21,10 +21,12 @@ #include "core/KernelParams.h" #include "luci_interpreter/core/Tensor.h" +#include <tensorflow/lite/kernels/internal/tensor_utils.h> #include <tensorflow/lite/kernels/internal/types.h> #include <cassert> #include <cstdint> +#include <stdexcept> namespace luci_interpreter { @@ -70,11 +72,49 @@ inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t fi } } -void calculateActivationRange(Activation activation, float *activation_min, float *activation_max); +inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3) +{ + return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3; +} + +TfLiteFusedActivation getTfLiteActivation(Activation activation); + +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max); void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max); +template <typename T> constexpr bool one_of_types() { return false; } + +// Checks if T is equal to one of {U,Other} types +template <typename T, typename U, typename... Other> constexpr bool one_of_types() +{ + return std::is_same<T, U>::value || one_of_types<T, Other...>(); +} + +/** + * Fills activation min and max parameters depending on given data type and activation + * + * T is a template parameter, so after optimization this code left with only required if case + * + * @tparam T data type of arithmetic operation output tensor + * @param params tflite params to fill + * @param activation luci_interpreter::Activation of arithmetic operation + */ +template <typename T> +void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act) +{ + static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype"); + + if (std::is_same<T, float>::value) + calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max); + if (std::is_same<T, int32_t>::value) + calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max); + else + calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max); +} + // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of its exponent. // @@ -94,6 +134,63 @@ void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quan Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape); +inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, + float output_scale) +{ + const double input_product_scale = static_cast<double>(input_scale * filter_scale); + LUCI_INTERPRETER_CHECK(input_product_scale >= 0); + return input_product_scale / static_cast<double>(output_scale); +} + +// TODO rename getQuantizedConvolutionMultiplers to something more general +// it is used for non conv operators too +inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale, + const std::vector<float> &filter_scale, + float output_scale) +{ + std::vector<double> effective_output_scales; + size_t n = filter_scale.size(); + effective_output_scales.reserve(n); + for (size_t i = 0; i < n; ++i) + { + effective_output_scales.push_back( + getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale)); + } + return effective_output_scales; +} + +struct ChannelQuantMultipliers +{ + int shift; + int32_t multiplier; + ChannelQuantMultipliers() = default; +}; + +inline std::vector<ChannelQuantMultipliers> +quantizeMultipliers(const std::vector<double> &effective_scale) +{ + size_t n = effective_scale.size(); + std::vector<ChannelQuantMultipliers> params(n); + for (size_t i = 0; i < n; ++i) + { + quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift); + } + return params; +} + +// Helper wrapper to hide broadcast logic +template <typename T> class BroadcastableWrapper +{ +public: + BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {} + + T operator[](int idx) { return _v[idx * _stride]; } + +private: + const std::vector<T> &_v; + int _stride; +}; + inline tflite::RuntimeShape getTensorShape(const Tensor *tensor) { if (tensor == nullptr) @@ -176,7 +273,7 @@ public: // Build with the tensors in 'tensor_list'. explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list) - : VectorOfTensors<uint8_t, is_const>(tensor_list) + : VectorOfTensors<uint8_t, is_const>(tensor_list) { for (TensorT *tensor : tensor_list) { diff --git a/compiler/luci-interpreter/src/kernels/While.cpp b/compiler/luci-interpreter/src/kernels/While.cpp new file mode 100644 index 000000000..153bd1a99 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/While.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/While.h" +#include "kernels/Utils.h" + +#include <cstring> + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst) +{ + for (size_t i = 0; i < src.size(); ++i) + { + LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type()); + dst[i]->resize(src[i]->shape()); + + const int32_t num_elements = src[i]->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(src[i]->element_type()); + std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size); + } +} + +void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst) +{ + std::vector<const Tensor *> const_src; + for (const auto &t : src) + const_src.push_back(t); + copy(const_src, dst); +} + +// TODO: Think about how allocate memory for output in main graph +void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph) +{ + for (auto tensor : tensors) + run_graph->configureAllocations(tensor); +} + +} // namespace + +While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, + RuntimeGraph *cond_graph, RuntimeGraph *body_graph) + : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph) +{ +} + +void While::configure() +{ + LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size()); + + LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size()); + + const auto &cond_outputs = _cond_graph->getOutputTensors(); + LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1) + LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL); +} + +/** + * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data() + */ +void While::execute() const +{ + const auto &cond_inputs = _cond_graph->getInputTensors(); + const auto &cond_outputs = _cond_graph->getOutputTensors(); + + configureTensorsAllocations(cond_inputs, _cond_graph); + + copy(getInputTensors(), cond_inputs); + + const auto &body_inputs = _body_graph->getInputTensors(); + const auto &body_outputs = _body_graph->getOutputTensors(); + + configureTensorsAllocations(body_inputs, _body_graph); + + while (true) + { + _cond_graph->execute(); + + bool cond_value = cond_outputs[0]->data<bool>()[0]; + if (!cond_value) + break; + + copy(cond_inputs, body_inputs); + + _body_graph->execute(); + + copy(body_outputs, cond_inputs); + } + + copy(cond_inputs, getOutputTensors()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/While.h b/compiler/luci-interpreter/src/kernels/While.h new file mode 100644 index 000000000..f758df3f3 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/While.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H +#define LUCI_INTERPRETER_KERNELS_WHILE_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class While : public Kernel +{ +public: + While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph, + RuntimeGraph *body_graph); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + RuntimeGraph *const _cond_graph = nullptr; + RuntimeGraph *const _body_graph = nullptr; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_WHILE_H diff --git a/compiler/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-interpreter/src/kernels/While.test.cpp new file mode 100644 index 000000000..cb8f89130 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/While.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeModule.h" +#include "kernels/Add.h" +#include "kernels/Less.h" +#include "kernels/While.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + graph->addKernel(std::make_unique<Less>(input, input_cond, output)); + + return graph; +} + +RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + AddParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique<Add>(input, input_add, output, params)); + + return graph; +} + +TEST(WhileTest, FloatLoop10) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get()); + Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); + + RuntimeModule module(nullptr); + RuntimeGraph *cond_graph = + buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get()); + RuntimeGraph *body_graph = + buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get()); + + While kernel({&input}, {&output}, cond_graph, body_graph); + kernel.configure(); + memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt index d99485d06..292771592 100644 --- a/compiler/luci-interpreter/src/loader/CMakeLists.txt +++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt @@ -1,22 +1,39 @@ -nnas_find_package(GTest REQUIRED) - set(SOURCES GraphLoader.h GraphLoader.cpp + KernelBuilderHelper.h + KernelBuilderHelper.cpp KernelBuilder.h KernelBuilder.cpp ModuleLoader.h ModuleLoader.cpp - RuntimeToIR.h) + RuntimeToIR.h + nodes/Builders.h) + +# include kernel specific builders +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "nodes/${NODE}.cpp") +endmacro(REGISTER_KERNEL) +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}") +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") -add_library(luci_interpreter_loader STATIC ${SOURCES}) -set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter_loader - PUBLIC luci_lang luci_interpreter_core - PRIVATE luci_interpreter_kernels nncc_common) +target_link_libraries(${LUCI_INTERPRETER_LOADER} + PUBLIC luci_lang ${LUCI_INTERPRETER_CORE} + PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) set(TEST_SOURCES KernelBuilder.test.cpp) -GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES}) -target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader) +GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER}) diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index 95c654769..ba99a579b 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -18,6 +18,7 @@ #include "loader/KernelBuilder.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> #include <loco/IR/Algorithm.h> namespace luci_interpreter @@ -57,13 +58,41 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size) return getNodeDataImpl<DataType::U8>(node, data_size); case DataType::FLOAT32: return getNodeDataImpl<DataType::FLOAT32>(node, data_size); + case DataType::S8: + return getNodeDataImpl<DataType::S8>(node, data_size); + case DataType::S16: + return getNodeDataImpl<DataType::S16>(node, data_size); case DataType::S32: return getNodeDataImpl<DataType::S32>(node, data_size); + case DataType::S64: + return getNodeDataImpl<DataType::S64>(node, data_size); + case DataType::BOOL: + return getNodeDataImpl<DataType::BOOL>(node, data_size); default: throw std::runtime_error("Unsupported type."); } } +const void *getNodeData(const luci::CircleCustom *node, size_t *data_size) +{ + if (node->custom_code() != "CircleReferencingConst") + return nullptr; + + // helper struct which describes data loaded to custom_options of CircleReferencingConst node + // TODO move this struct to header + struct ConstDataReference + { + const uint8_t *data = nullptr; + uint32_t size = 0; + }; + + const auto &custom_options = node->custom_options(); + const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data()); + + *data_size = const_data_ref.size; + return const_data_ref.data; +} + bool isExecutableNode(const luci::CircleNode *node) { switch (node->opcode()) @@ -74,10 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node) case luci::CircleOpcode::CIRCLEOUTPUT: case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE: // The following nodes denote outputs of multiple-output nodes. + case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT: + case luci::CircleOpcode::CIRCLECUSTOMOUT: case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT: case luci::CircleOpcode::CIRCLESPLITOUT: + case luci::CircleOpcode::CIRCLESPLITVOUT: + case luci::CircleOpcode::CIRCLETOPKV2OUT: + case luci::CircleOpcode::CIRCLEUNIQUEOUT: case luci::CircleOpcode::CIRCLEUNPACKOUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + case luci::CircleOpcode::CIRCLEWHILEOUT: return false; + // Custom nodes may be executable and non-executable + case luci::CircleOpcode::CUSTOM: + { + auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more non-executable Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return false; + + return true; + } default: return true; } @@ -91,23 +140,43 @@ bool isTensorProducingNode(const luci::CircleNode *node) case luci::CircleOpcode::CIRCLEOUTPUT: // The following nodes are multiple-output nodes. They do not produce tensors, the tensors // are produced by the corresponding *Out nodes instead. + case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM: + case luci::CircleOpcode::CUSTOM: case luci::CircleOpcode::IF: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5: case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::SPLIT_V: + case luci::CircleOpcode::TOPK_V2: + case luci::CircleOpcode::UNIQUE: case luci::CircleOpcode::UNPACK: + case luci::CircleOpcode::WHILE: return false; default: return true; } } +bool isSupportedCustomNode(const luci::CircleNode *node) +{ + const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return true; + + return false; +} + } // namespace GraphLoader::GraphLoader( - const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir), - _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) + const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager) + : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir), + _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor), + _memory_manager(memory_manager) { } @@ -117,24 +186,36 @@ void GraphLoader::loadTensors() { const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i)); + if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) + { + const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node); + throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " + + node->name()); + } + if (!isTensorProducingNode(node)) continue; - // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred. + // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will + // be inferred. Shape shape{}; - if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node)) + switch (node->opcode()) { - shape = getNodeShape(input_node); - } - else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node)) - { - shape = getNodeShape(const_node); + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + shape = getNodeShape(node); + break; + default: + break; } AffineQuantization quantization; if (node->quantparam() != nullptr) { const luci::CircleQuantParam *params = node->quantparam(); + assert(params->scale.size() == params->zerop.size()); quantization.scale.assign(params->scale.cbegin(), params->scale.cend()); quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend()); quantization.quantized_dimension = params->quantized_dimension; @@ -143,12 +224,40 @@ void GraphLoader::loadTensors() auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization), node->name()); + // If node has execution plan then read memory offsets for nodes + // from the beginning of shared memory buffer. Used in Static Memory Manager. + if (luci::has_execution_plan(node)) + { + auto execution_plan = luci::get_execution_plan(node); + assert(!execution_plan.offsets().empty()); + tensor->set_offset(execution_plan.offsets().front()); + } + if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node)) { size_t data_size{}; const void *const_data = getNodeData(const_node, &data_size); if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); tensor->writeData(const_data, data_size); + } + } + else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node)) + { + const auto *custom_node = + loco::must_cast<const luci::CircleCustom *>(custom_out_node->input()); + + if (custom_node->custom_code() == "CircleReferencingConst") + { + size_t data_size{}; + const void *const_data = getNodeData(custom_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } } _node_to_tensor.emplace(node, tensor.get()); @@ -165,6 +274,7 @@ void GraphLoader::initInputOutputTensors() const for (size_t i = 0; i < input_nodes.size(); ++i) { input_tensors[i] = _node_to_tensor.at(input_nodes[i]); + _memory_manager->allocate_memory(*input_tensors[i]); } _runtime_graph->setInputTensors(input_tensors); @@ -183,16 +293,54 @@ void GraphLoader::loadOperators() KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor); // Create kernels for executable nodes. This has to be done in execution order. - for (const loco::Node *loco_node : - loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)))) + auto graph = const_cast<loco::Graph *>(_graph); + + auto const graph_nodes = loco::all_nodes(graph); + + // Checking for execution plan in node annotations. + bool has_execution_annotation = true; + auto const checking_exec_plan = [&has_execution_annotation](auto const node) { + const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node); + if (!luci::has_execution_plan(circle_node)) + has_execution_annotation = false; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan); + + if (has_execution_annotation) { - const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node); + // Build ordered_nodes vector that stores the order of execution of graph nodes. + std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size()); + + auto const filler = [&ordered_nodes](auto const node) { + const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node); + auto const position = luci::get_execution_plan(circle_node).order_in_plan(); + ordered_nodes.at(position) = circle_node; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), filler); - if (isExecutableNode(node)) + for (auto node : ordered_nodes) + { + if (isExecutableNode(node)) + { + std::unique_ptr<Kernel> kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } + else + { + // If it is impossible to build the execution order plan, + // then we use the default postorder_traversal approach. + for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph))) { - std::unique_ptr<Kernel> kernel = node->accept(&kernel_builder); - _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); - _runtime_graph->addKernel(std::move(kernel)); + const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node); + if (isExecutableNode(node)) + { + std::unique_ptr<Kernel> kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } } } } diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h index 89c5bcad7..fe066ecf8 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.h +++ b/compiler/luci-interpreter/src/loader/GraphLoader.h @@ -19,6 +19,7 @@ #include "core/RuntimeGraph.h" #include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" #include <loco/IR/Graph.h> @@ -32,7 +33,8 @@ class GraphLoader public: GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); void loadTensors(); void initInputOutputTensors() const; @@ -42,6 +44,7 @@ private: const loco::Graph *_graph; RuntimeGraph *_runtime_graph; RuntimeToIR &_runtime_to_ir; + IMemoryManager *_memory_manager; const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp index 126a1cb5b..c1e2c630a 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp @@ -15,586 +15,118 @@ */ #include "loader/KernelBuilder.h" +#include "loader/nodes/Builders.h" -#include "kernels/Add.h" -#include "kernels/ArgMax.h" -#include "kernels/AveragePool2D.h" -#include "kernels/Concatenation.h" -#include "kernels/Conv2D.h" -#include "kernels/DepthToSpace.h" -#include "kernels/DepthwiseConv2D.h" -#include "kernels/Elu.h" -#include "kernels/FullyConnected.h" -#include "kernels/If.h" -#include "kernels/L2Normalize.h" -#include "kernels/L2Pool2D.h" -#include "kernels/LeakyRelu.h" -#include "kernels/LocalResponseNormalization.h" -#include "kernels/Logistic.h" -#include "kernels/MaxPool2D.h" -#include "kernels/Mean.h" -#include "kernels/Mul.h" -#include "kernels/Pad.h" -#include "kernels/Reshape.h" -#include "kernels/Reverse.h" -#include "kernels/Rsqrt.h" -#include "kernels/Slice.h" -#include "kernels/Softmax.h" -#include "kernels/SpaceToDepth.h" -#include "kernels/Split.h" -#include "kernels/StridedSlice.h" -#include "kernels/Sqrt.h" -#include "kernels/Squeeze.h" -#include "kernels/Tanh.h" -#include "kernels/Unpack.h" -#include "kernels/Transpose.h" -#include "kernels/TransposeConv.h" +#include <luci/IR/CircleOpcode.h> +#include <luci/IR/CircleNodeDecl.h> #include <stdexcept> -namespace luci_interpreter -{ - -template <typename CircleNodeOut> -static std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node) +namespace { - std::vector<const CircleNodeOut *> output_nodes; - for (const loco::Node *loco_node : loco::succs(node)) - { - output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node)); - } - std::sort(output_nodes.begin(), output_nodes.end(), - [](const CircleNodeOut *node1, const CircleNodeOut *node2) { - return node1->index() < node2->index(); - }); - return {output_nodes.cbegin(), output_nodes.cend()}; -} -const Tensor *KernelBuilder::getInputTensor(const loco::Node *node) const +// TODO Extract this helper function +const std::string toString(luci::CircleOpcode opcode) { - const Tensor *tensor = _node_to_tensor.at(node); - assert(tensor != nullptr); - return tensor; -} + static const char *names[] = { +#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS, +#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS, +#include <luci/IR/CircleNodes.lst> +#undef CIRCLE_NODE +#undef CIRCLE_VNODE + }; -const Tensor *KernelBuilder::getOptionalInputTensor(const loco::Node *node) const -{ - if (dynamic_cast<const luci::CircleOutputExclude *>(node)) - { - return nullptr; - } - return getInputTensor(node); -} + auto const node_name = names[static_cast<int>(opcode)]; -Tensor *KernelBuilder::getOutputTensor(const loco::Node *node) const -{ - Tensor *tensor = _node_to_tensor.at(node); - assert(tensor != nullptr); - return tensor; -} + assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS -std::vector<Tensor *> -KernelBuilder::getOutputTensors(const std::vector<const loco::Node *> &nodes) const -{ - std::vector<Tensor *> tensors; - tensors.reserve(nodes.size()); - for (const loco::Node *node : nodes) - tensors.push_back(getOutputTensor(node)); - return tensors; + // Return substring of class name ("Circle" is sliced out) + // Ex: Return "Conv2D" for "CircleConv2D" node + return std::string(node_name).substr(6); } -RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const -{ - RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); - assert(runtime_graph != nullptr); - return runtime_graph; -} +} // namespace -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node) +namespace luci_interpreter { - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - AddParams params{}; - params.activation = node->fusedActivationFunction(); +#define CIRCLE_NODE(OPCODE, CLASS) CLASS, +#define CIRCLE_VNODE(OPCODE, CLASS) CLASS, - return std::make_unique<kernels::Add>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleArgMax *node) +// This enum is auxiliary. +// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE, +// because list of target operators is in format of CLASS names +enum class BuilderId { - assert(node->arity() == 2); - const Tensor *input = getInputTensor(node->input()); - const Tensor *axis = getInputTensor(node->dimension()); - Tensor *output = getOutputTensor(node); +#include <luci/IR/CircleNodes.lst> + Size // casts to count of values in BuilderId enum +}; - ArgMaxParams params{}; - params.output_type = node->output_type(); +#undef CIRCLE_VNODE +#undef CIRCLE_NODE - return std::make_unique<kernels::ArgMax>(input, axis, output, params); -} +/** + * @brief Registry of kernel builders + * + * This class contains mapping from Opcodes to kernel builder functions + */ -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *node) +class KernelBuilderRegistry { - assert(node->arity() == 1); +public: + using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *, + KernelBuilderHelper &); - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); + KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr) + { +#define REGISTER_KERNEL(name) \ + register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name); - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); +#include "KernelsToBuild.lst" - return std::make_unique<kernels::AveragePool2D>(input, output, params); -} +#undef REGISTER_KERNEL + } -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *node) -{ - std::vector<const Tensor *> inputs(node->numValues()); - for (uint32_t i = 0; i < node->numValues(); ++i) + KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const { - inputs[i] = getInputTensor(node->values(i)); + return _operator_builders.at(size_t(opcode)); } - Tensor *output = getOutputTensor(node); - - ConcatenationParams params{}; - params.axis = node->axis(); - - return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConst *) -{ - throw std::runtime_error("Const node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConv2D *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *bias = getInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - - Conv2DParams params{}; - params.padding = node->padding(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.dilation_height_factor = node->dilation()->h(); - params.dilation_width_factor = node->dilation()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthToSpace *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - DepthToSpaceParams params{}; - params.block_size = node->block_size(); - - return std::make_unique<kernels::DepthToSpace>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthwiseConv2D *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *bias = getInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - DepthwiseConv2DParams params{}; - params.padding = node->padding(); - params.depth_multiplier = node->depthMultiplier(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.dilation_height_factor = node->dilation()->h(); - params.dilation_width_factor = node->dilation()->w(); - params.activation = node->fusedActivationFunction(); +private: + std::vector<KernelBuilderFunc *> _operator_builders; - return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleElu *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Elu>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleFullyConnected *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *weights = getInputTensor(node->weights()); - const Tensor *bias = getOptionalInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - - FullyConnectedParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); - assert(node->arity() == 1 + node->input_count()); - assert(output_nodes.size() == static_cast<size_t>(node->output_count())); - - const Tensor *cond = getInputTensor(node->cond()); - std::vector<const Tensor *> inputs(node->input_count()); - for (uint32_t i = 0; i < node->input_count(); ++i) + void register_kernel_builder(BuilderId id, KernelBuilderFunc *func) { - inputs[i] = getInputTensor(node->input(i)); + // Using BuilderId is a duplicate of luci::CirclreOpcode, + // size_t(id) is equal to size_t(corresponding operation opcode). + assert(size_t(id) < _operator_builders.size()); + _operator_builders[size_t(id)] = func; } - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph()); - RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph()); - - return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph, - else_graph); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *) -{ - throw std::runtime_error("Input node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Normalize *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); +}; - L2NormParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::L2Normalize>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Pool2D *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); - - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::L2Pool2D>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLeakyRelu *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - LeakyReluParams params{}; - params.alpha = node->alpha(); - - return std::make_unique<kernels::LeakyRelu>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLocalResponseNormalization *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - LocalResponseNormalizationParams params{}; - params.radius = node->radius(); - params.bias = node->bias(); - params.alpha = node->alpha(); - params.beta = node->beta(); - - return std::make_unique<kernels::LocalResponseNormalization>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLogistic *node) +KernelBuilder::KernelBuilder( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) { - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Logistic>(input, output); + _builder_registry = std::make_unique<KernelBuilderRegistry>(); } -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMaxPool2D *node) +KernelBuilder::~KernelBuilder() { - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); - - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::MaxPool2D>(input, output, params); + // Need to define in this CPP to hide KernelBuilderRegistry internals. + // This destructor deletes _builder_registry } -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMean *node) +std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node) { - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *axes = getInputTensor(node->reduction_indices()); - Tensor *output = getOutputTensor(node); - - ReducerParams params{}; - params.keep_dims = node->keep_dims(); - - return std::make_unique<kernels::Mean>(input, axes, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMul *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - MulParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Mul>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleOutput *) -{ - throw std::runtime_error("Output node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePad *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *paddings = getInputTensor(node->paddings()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Pad>(input, paddings, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReshape *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->tensor()); - const Tensor *shape = getInputTensor(node->shape()); - Tensor *output = getOutputTensor(node); - - // NOTE 'newShape' attribute is ignored. - return std::make_unique<kernels::Reshape>(input, shape, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->tensor()); - const Tensor *axes = getInputTensor(node->axis()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Reverse>(input, axes, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Rsqrt>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *begin = getInputTensor(node->begin()); - const Tensor *size = getInputTensor(node->size()); - - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Slice>(input, begin, size, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->logits()); - Tensor *output = getOutputTensor(node); - - SoftmaxParams params{}; - params.beta = node->beta(); - - return std::make_unique<kernels::Softmax>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToDepth *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->input()); - - Tensor *output = getOutputTensor(node); - - SpaceToDepthParams params{}; - params.block_size = node->block_size(); - - return std::make_unique<kernels::SpaceToDepth>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); - assert(node->arity() == 2); - assert(output_nodes.size() == static_cast<size_t>(node->num_split())); - - const Tensor *axis = getInputTensor(node->split_dim()); - const Tensor *input = getInputTensor(node->input()); - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - // NOTE 'num_splits' attribute is ignored. - return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Sqrt>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - SqueezeParams params{}; - params.squeeze_dims = node->squeeze_dims(); - - return std::make_unique<kernels::Squeeze>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *node) -{ - assert(node->arity() == 4); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *begin = getInputTensor(node->begin()); - const Tensor *end = getInputTensor(node->end()); - const Tensor *strides = getInputTensor(node->strides()); - - Tensor *output = getOutputTensor(node); - - StridedSliceParams params{}; - params.begin_mask = node->begin_mask(); - params.ellipsis_mask = node->ellipsis_mask(); - params.end_mask = node->end_mask(); - params.new_axis_mask = node->new_axis_mask(); - params.shrink_axis_mask = node->shrink_axis_mask(); - - return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Tanh>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->a()); - const Tensor *perm = getInputTensor(node->perm()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Transpose>(input, perm, output); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node) -{ - assert(node->arity() == 4); - - const Tensor *input_sizes = getInputTensor(node->inputSizes()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *out_backprop = getInputTensor(node->outBackprop()); - const Tensor *bias = getOptionalInputTensor(node->bias()); - - Tensor *output = getOutputTensor(node); - - TransposeConvParams params{}; - params.padding = node->padding(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - - return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, - params); -} - -std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleUnpack *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); - assert(node->arity() == 1); - assert(output_nodes.size() == static_cast<size_t>(node->num())); - - const Tensor *input = getInputTensor(node->value()); - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - UnpackParams params{}; - params.axis = node->axis(); + auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode()); + if (specific_builder != nullptr) + return specific_builder(node, *this); - // NOTE 'num' attribute is ignored. - return std::make_unique<kernels::Unpack>(input, std::move(outputs), params); + std::string msg = "Unsupported operator: "; + msg += toString(node->opcode()) + " in " + std::string(node->name()); + throw std::invalid_argument(msg.c_str()); } } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h index 31cb9d8fc..b1f383394 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.h +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h @@ -17,79 +17,34 @@ #ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H #define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H +#include "loader/KernelBuilderHelper.h" + #include "core/Kernel.h" #include "core/RuntimeGraph.h" #include <luci/IR/CircleNodeVisitor.h> #include <memory> -#include <vector> #include <unordered_map> namespace luci_interpreter { -class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>> +class KernelBuilderRegistry; + +class KernelBuilder : public KernelBuilderHelper { public: KernelBuilder( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) - { - } - - std::unique_ptr<Kernel> visit(const luci::CircleAdd *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleArgMax *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleAveragePool2D *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleDepthToSpace *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleDepthwiseConv2D *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleElu *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleFullyConnected *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleIf *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleL2Normalize *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleL2Pool2D *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleLeakyRelu *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleLocalResponseNormalization *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleLogistic *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleInput *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleMaxPool2D *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleMean *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleMul *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override; - std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override; - std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override; - -private: - const Tensor *getInputTensor(const loco::Node *node) const; - - const Tensor *getOptionalInputTensor(const loco::Node *node) const; - - Tensor *getOutputTensor(const loco::Node *node) const; + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); - std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const; + ~KernelBuilder(); - RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const; + std::unique_ptr<Kernel> build(const luci::CircleNode *node); private: - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; - const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; + std::unique_ptr<KernelBuilderRegistry> _builder_registry; }; } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp index 4e2bc3d0b..10a01f418 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -16,35 +16,67 @@ #include "loader/GraphLoader.h" #include "loader/KernelBuilder.h" +#include "luci_interpreter/SimpleMemoryManager.h" #include <kernels/Add.h> #include <kernels/ArgMax.h> #include <kernels/AveragePool2D.h> +#include <kernels/BatchMatMul.h> +#include <kernels/Cast.h> #include <kernels/Concatenation.h> #include <kernels/Conv2D.h> #include <kernels/DepthToSpace.h> #include <kernels/DepthwiseConv2D.h> +#include <kernels/Div.h> #include <kernels/Elu.h> +#include <kernels/Exp.h> +#include <kernels/Floor.h> +#include <kernels/FloorDiv.h> +#include <kernels/Equal.h> #include <kernels/FullyConnected.h> +#include <kernels/Greater.h> +#include <kernels/GreaterEqual.h> +#include <kernels/InstanceNorm.h> #include <kernels/L2Normalize.h> #include <kernels/L2Pool2D.h> #include <kernels/LeakyRelu.h> +#include <kernels/Less.h> +#include <kernels/LessEqual.h> #include <kernels/LocalResponseNormalization.h> +#include <kernels/LogicalAnd.h> +#include <kernels/LogicalNot.h> +#include <kernels/LogicalOr.h> #include <kernels/Logistic.h> +#include <kernels/LogSoftmax.h> +#include <kernels/Maximum.h> #include <kernels/MaxPool2D.h> #include <kernels/Mean.h> +#include <kernels/Minimum.h> #include <kernels/Mul.h> +#include <kernels/Neg.h> +#include <kernels/NotEqual.h> +#include <kernels/OneHot.h> #include <kernels/Pad.h> +#include <kernels/PadV2.h> +#include <kernels/Pow.h> +#include <kernels/PRelu.h> +#include <kernels/Relu.h> +#include <kernels/Relu6.h> #include <kernels/Reshape.h> -#include <kernels/Reverse.h> +#include <kernels/ResizeBilinear.h> +#include <kernels/ResizeNearestNeighbor.h> +#include <kernels/ReverseV2.h> #include <kernels/Rsqrt.h> #include <kernels/Slice.h> #include <kernels/Softmax.h> #include <kernels/SpaceToDepth.h> #include <kernels/Split.h> +#include <kernels/SplitV.h> #include <kernels/Sqrt.h> +#include <kernels/SquaredDifference.h> #include <kernels/Squeeze.h> #include <kernels/StridedSlice.h> +#include <kernels/Sub.h> #include <kernels/Tanh.h> #include <kernels/Transpose.h> #include <kernels/TransposeConv.h> @@ -63,6 +95,9 @@ class KernelBuilderTest : public Test { protected: luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); } + void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args) { @@ -86,15 +121,16 @@ protected: { std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph; - RuntimeGraph runtime_graph(nullptr); + RuntimeGraph runtime_graph(nullptr, _memory_manager.get()); + graph_to_runtime_graph[&_graph] = &runtime_graph; RuntimeToIR runtime_to_ir; GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph, - _node_to_tensor); + _node_to_tensor, _memory_manager.get()); graph_loader.loadTensors(); KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor); - auto kernel = op->accept(&kernel_builder); + auto kernel = kernel_builder.build(op); return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release())); } @@ -175,6 +211,41 @@ TEST_F(KernelBuilderTest, AveragePool2D) EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } +TEST_F(KernelBuilderTest, BatchMatMul) +{ + auto *lhs = createInputNode(); + auto *rhs = createInputNode(); + + auto *op = createNode<luci::CircleBatchMatMul>(); + op->x(lhs); + op->y(rhs); + op->adj_x(false); + op->adj_y(false); + + auto kernel = buildKernel<kernels::BatchMatMul>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), lhs); + checkTensor(kernel->y(), rhs); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x())); + EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y())); +} + +TEST_F(KernelBuilderTest, Cast) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleCast>(); + op->x(input); + + auto kernel = buildKernel<kernels::Cast>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Concatenation) { auto *input1 = createInputNode(); @@ -192,6 +263,7 @@ TEST_F(KernelBuilderTest, Concatenation) checkTensor(kernel->input(1), input2); checkTensor(kernel->output(), op); EXPECT_THAT(kernel->params().axis, Eq(op->axis())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } TEST_F(KernelBuilderTest, Conv2D) @@ -279,6 +351,26 @@ TEST_F(KernelBuilderTest, DepthwiseConv2D) EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } +TEST_F(KernelBuilderTest, Div) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleDiv>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Div>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + TEST_F(KernelBuilderTest, Elu) { auto *input = createInputNode(); @@ -293,6 +385,68 @@ TEST_F(KernelBuilderTest, Elu) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, Exp) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleExp>(); + op->x(input); + + auto kernel = buildKernel<kernels::Exp>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Floor) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleFloor>(); + op->x(input); + + auto kernel = buildKernel<kernels::Floor>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, FloorDiv) +{ + auto *x = createInputNode(); + auto *y = createInputNode(); + + auto *op = createNode<luci::CircleFloorDiv>(); + op->x(x); + op->y(y); + + auto kernel = buildKernel<kernels::FloorDiv>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x); + checkTensor(kernel->y(), y); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Equal) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Equal>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, FullyConnected) { auto *input = createInputNode(); @@ -316,6 +470,65 @@ TEST_F(KernelBuilderTest, FullyConnected) EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } +TEST_F(KernelBuilderTest, Greater) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleGreater>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Greater>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, GreaterEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleGreaterEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::GreaterEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, InstanceNorm) +{ + auto *input = createInputNode(); + auto *gamma = createInputNode(); + auto *beta = createInputNode(); + + auto *op = createNode<luci::CircleInstanceNorm>(); + op->input(input); + op->gamma(gamma); + op->beta(beta); + + op->epsilon(1e-05); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::InstanceNorm>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->gamma(), gamma); + checkTensor(kernel->beta(), beta); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + TEST_F(KernelBuilderTest, L2Normalize) { auto *input = createInputNode(); @@ -377,6 +590,40 @@ TEST_F(KernelBuilderTest, LeakyRelu) EXPECT_THAT(kernel->params().alpha, Eq(op->alpha())); } +TEST_F(KernelBuilderTest, Less) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleLess>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Less>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LessEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleLessEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::LessEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, LocalResponseNormalization) { auto *input = createInputNode(); @@ -400,6 +647,54 @@ TEST_F(KernelBuilderTest, LocalResponseNormalization) EXPECT_THAT(kernel->params().beta, Eq(op->beta())); } +TEST_F(KernelBuilderTest, LogicalAnd) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleLogicalAnd>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::LogicalAnd>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalNot) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLogicalNot>(); + op->x(input); + + auto kernel = buildKernel<kernels::LogicalNot>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalOr) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleLogicalOr>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::LogicalOr>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Logistic) { auto *input = createInputNode(); @@ -414,6 +709,37 @@ TEST_F(KernelBuilderTest, Logistic) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, LogSoftmax) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLogSoftmax>(); + op->logits(input); + + auto kernel = buildKernel<kernels::LogSoftmax>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Maximum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleMaximum>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Maximum>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, MaxPool2D) { auto *input = createInputNode(); @@ -461,6 +787,23 @@ TEST_F(KernelBuilderTest, Mean) EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims())); } +TEST_F(KernelBuilderTest, Minimum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleMinimum>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Minimum>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Mul) { auto *input1 = createInputNode(); @@ -481,6 +824,62 @@ TEST_F(KernelBuilderTest, Mul) EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } +TEST_F(KernelBuilderTest, Neg) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleNeg>(); + op->x(input); + + auto kernel = buildKernel<kernels::Neg>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, NotEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleNotEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::NotEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, OneHot) +{ + auto *indices = createInputNode(); + auto *depth = createInputNode(); + auto *on_value = createInputNode(); + auto *off_value = createInputNode(); + auto axis = 1; + + auto *op = createNode<luci::CircleOneHot>(); + op->indices(indices); + op->depth(depth); + op->on_value(on_value); + op->off_value(off_value); + op->axis(axis); + + auto kernel = buildKernel<kernels::OneHot>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->indices(), indices); + checkTensor(kernel->depth(), depth); + checkTensor(kernel->on_value(), on_value); + checkTensor(kernel->off_value(), off_value); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + TEST_F(KernelBuilderTest, Pad) { auto *input = createInputNode(); @@ -498,6 +897,88 @@ TEST_F(KernelBuilderTest, Pad) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, PadV2) +{ + auto *input = createInputNode(); + auto *paddings = createInputNode(); + auto *constant_values = createInputNode(); + + auto *op = createNode<luci::CirclePadV2>(); + op->input(input); + op->paddings(paddings); + op->constant_values(constant_values); + + auto kernel = buildKernel<kernels::PadV2>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->paddings(), paddings); + checkTensor(kernel->constant_values(), constant_values); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Pow) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CirclePow>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Pow>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, PRelu) +{ + auto *input = createInputNode(); + auto *alpha = createInputNode(); + + auto *op = createNode<luci::CirclePRelu>(); + op->input(input); + op->alpha(alpha); + + auto kernel = buildKernel<kernels::PRelu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->alpha(), alpha); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRelu>(); + op->features(input); + + auto kernel = buildKernel<kernels::Relu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu6) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRelu6>(); + op->features(input); + + auto kernel = buildKernel<kernels::Relu6>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Reshape) { auto *input = createInputNode(); @@ -515,6 +996,48 @@ TEST_F(KernelBuilderTest, Reshape) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, ResizeBilinear) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode<luci::CircleResizeBilinear>(); + op->input(input); + op->size(size); + op->align_corners(true); + op->half_pixel_centers(true); + + auto kernel = buildKernel<kernels::ResizeBilinear>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers())); +} + +TEST_F(KernelBuilderTest, ResizeNearestNeighbor) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode<luci::CircleResizeNearestNeighbor>(); + op->input(input); + op->size(size); + op->align_corners(true); + + auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor + // after adding, need to be updated. +} + TEST_F(KernelBuilderTest, ReverseV2) { auto *input = createInputNode(); @@ -524,7 +1047,7 @@ TEST_F(KernelBuilderTest, ReverseV2) op->tensor(input); op->axis(axes); - auto kernel = buildKernel<kernels::Reverse>(op); + auto kernel = buildKernel<kernels::ReverseV2>(op); ASSERT_THAT(kernel, NotNull()); checkTensor(kernel->input(), input); @@ -622,6 +1145,31 @@ TEST_F(KernelBuilderTest, Split) checkTensor(kernel->output(1), output2); } +TEST_F(KernelBuilderTest, SplitV) +{ + auto *input = createInputNode(); + auto *size_splits = createInputNode(); + auto *axis = createInputNode(); + auto *op = createNode<luci::CircleSplitV>(); + auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0); + auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1); + + op->input(input); + op->size_splits(size_splits); + op->split_dim(axis); + + op->num_split(2); + + auto kernel = buildKernel<kernels::SplitV>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size_splits(), size_splits); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(0), output0); + checkTensor(kernel->output(1), output1); +} + TEST_F(KernelBuilderTest, Sqrt) { auto *input = createInputNode(); @@ -636,6 +1184,23 @@ TEST_F(KernelBuilderTest, Sqrt) checkTensor(kernel->output(), op); } +TEST_F(KernelBuilderTest, SquaredDifference) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleSquaredDifference>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::SquaredDifference>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + TEST_F(KernelBuilderTest, Squeeze) { auto *input = createInputNode(); @@ -687,6 +1252,26 @@ TEST_F(KernelBuilderTest, StridedSlice) EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask())); } +TEST_F(KernelBuilderTest, Sub) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleSub>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Sub>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + TEST_F(KernelBuilderTest, Tanh) { auto *input = createInputNode(); @@ -734,6 +1319,7 @@ TEST_F(KernelBuilderTest, TransposeConv) op->padding(luci::Padding::SAME); op->stride()->h(11); op->stride()->w(13); + op->fusedActivationFunction(luci::FusedActFunc::NONE); auto kernel = buildKernel<kernels::TransposeConv>(op); ASSERT_THAT(kernel, NotNull()); @@ -746,6 +1332,7 @@ TEST_F(KernelBuilderTest, TransposeConv) EXPECT_THAT(kernel->params().padding, Eq(op->padding())); EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); } TEST_F(KernelBuilderTest, Unpack) diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp new file mode 100644 index 000000000..23c96a6db --- /dev/null +++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/KernelBuilderHelper.h" + +#include <luci/IR/Nodes/CircleOutput.h> + +namespace luci_interpreter +{ + +const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const +{ + const Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const +{ + if (dynamic_cast<const luci::CircleOutputExclude *>(node)) + { + return nullptr; + } + return getInputTensor(node); +} + +Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const +{ + Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +std::vector<Tensor *> +KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const +{ + std::vector<Tensor *> tensors; + tensors.reserve(nodes.size()); + for (const loco::Node *node : nodes) + tensors.push_back(getOutputTensor(node)); + return tensors; +} + +RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const +{ + RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); + assert(runtime_graph != nullptr); + return runtime_graph; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h new file mode 100644 index 000000000..d6fb253b1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H +#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +#include <loco/IR/Graph.h> +#include <loco/IR/Node.h> + +#include <vector> +#include <unordered_map> + +namespace luci_interpreter +{ + +class KernelBuilderHelper +{ +public: + KernelBuilderHelper( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) + { + } + +public: + const Tensor *getInputTensor(const loco::Node *node) const; + const Tensor *getOptionalInputTensor(const loco::Node *node) const; + + Tensor *getOutputTensor(const loco::Node *node) const; + std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const; + + RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const; + +public: + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const + { + return _graph_to_runtime_graph; + } + + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const + { + return _node_to_tensor; + } + +private: + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; + const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; +}; + +template <typename CircleNodeOut> +std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node) +{ + std::vector<const CircleNodeOut *> output_nodes; + for (const loco::Node *loco_node : loco::succs(node)) + { + output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node)); + } + std::sort(output_nodes.begin(), output_nodes.end(), + [](const CircleNodeOut *node1, const CircleNodeOut *node2) { + return node1->index() < node2->index(); + }); + return {output_nodes.cbegin(), output_nodes.cend()}; +} + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp index b9a2ae0a9..2f278b087 100644 --- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp +++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp @@ -23,9 +23,10 @@ namespace luci_interpreter ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, RuntimeToIR &runtime_to_ir, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir), - _node_to_tensor(node_to_tensor) + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager) + : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir), + _node_to_tensor(node_to_tensor), _memory_manager(memory_manager) { } @@ -35,14 +36,14 @@ void ModuleLoader::load() // process for control flow nodes. for (size_t i = 0; i < _module->size(); ++i) { - _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph()); + _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager)); } for (size_t i = 0; i < _module->size(); ++i) { const loco::Graph *graph = _module->graph(i); RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph, - _node_to_tensor); + _node_to_tensor, _memory_manager); loader.loadTensors(); loader.initInputOutputTensors(); loader.loadOperators(); diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h index 1af0ed747..11326a2ee 100644 --- a/compiler/luci-interpreter/src/loader/ModuleLoader.h +++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h @@ -19,6 +19,7 @@ #include "core/RuntimeModule.h" #include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" #include <luci/IR/Module.h> @@ -32,11 +33,13 @@ class ModuleLoader public: ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, RuntimeToIR &runtime_to_ir, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); void load(); private: + IMemoryManager *_memory_manager; const luci::Module *_module; RuntimeModule *_runtime_module; RuntimeToIR &_runtime_to_ir; diff --git a/compiler/luci-interpreter/src/loader/nodes/Abs.cpp b/compiler/luci-interpreter/src/loader/nodes/Abs.cpp new file mode 100644 index 000000000..394711145 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Abs.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Abs.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAbs(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleAbs *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Abs>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp new file mode 100644 index 000000000..501e84752 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Add.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + AddParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Add>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp new file mode 100644 index 000000000..f3ca55744 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ArgMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node); + assert(node->arity() == 2); + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->dimension()); + Tensor *output = helper.getOutputTensor(node); + + ArgMaxParams params{}; + params.output_type = node->output_type(); + + return std::make_unique<kernels::ArgMax>(input, axis, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp new file mode 100644 index 000000000..a8135706f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/AveragePool2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp new file mode 100644 index 000000000..9da2f6d93 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchMatMul.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node); + assert(node->arity() == 2); + + const Tensor *lhs = helper.getInputTensor(node->x()); + const Tensor *rhs = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + auto lhs_scratchpad = + std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, ""); + lhs_scratchpad->set_observable(false); + lhs_scratchpad->set_data_buffer(nullptr); + auto rhs_scratchpad = + std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, ""); + rhs_scratchpad->set_observable(false); + rhs_scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current BatchMatMul temporary was found. + if (execution_plan.offsets().size() > 1) + { + assert(execution_plan.offsets().size() == 3); + + // If this is true, then we keep this offset in scratchpad. + lhs_scratchpad->set_offset(execution_plan.offsets().at(1)); + rhs_scratchpad->set_offset(execution_plan.offsets().at(2)); + } + } + Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad)); + Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad)); + + BatchMatMulParams params; + params.adj_x = node->adj_x(); + params.adj_y = node->adj_y(); + + return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp new file mode 100644 index 000000000..ac6ebb30f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchToSpaceND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *crops = helper.getInputTensor(node->crops()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-interpreter/src/loader/nodes/Builders.h new file mode 100644 index 000000000..eab284008 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Builders.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H +#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H + +#include "loader/KernelBuilderHelper.h" + +#include "luci/IR/CircleNodes.h" + +namespace luci_interpreter +{ + +#define REGISTER_KERNEL(name) \ + std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \ + KernelBuilderHelper &helper); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp new file mode 100644 index 000000000..a16354c96 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Cast.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node); + + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Cast>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp new file mode 100644 index 000000000..ba2564ea2 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Concatenation.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node); + std::vector<const Tensor *> inputs(node->numValues()); + for (uint32_t i = 0; i < node->numValues(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + ConcatenationParams params{}; + params.axis = node->axis(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp new file mode 100644 index 000000000..218165e20 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Conv2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + Conv2DParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp new file mode 100644 index 000000000..174946367 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthToSpace.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + DepthToSpaceParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::DepthToSpace>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp new file mode 100644 index 000000000..8af1e3b58 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthwiseConv2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + DepthwiseConv2DParams params{}; + params.padding = node->padding(); + params.depth_multiplier = node->depthMultiplier(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp new file mode 100644 index 000000000..787322e9b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Dequantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Dequantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp new file mode 100644 index 000000000..0611dfdab --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Div.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node); + assert(node->arity() == 2); + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + DivParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Div>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp new file mode 100644 index 000000000..a79985e3b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Elu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Elu>(input, output); +} +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp new file mode 100644 index 000000000..59692883f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Equal.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) + +{ + const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Equal>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp new file mode 100644 index 000000000..30d11cb89 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Exp.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Exp>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp new file mode 100644 index 000000000..9840c34e5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ExpandDims.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ExpandDims>(input, axis, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp new file mode 100644 index 000000000..3aefdf1c5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Fill.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node); + assert(node->arity() == 2); + + const auto dims = helper.getInputTensor(node->dims()); + const auto value = helper.getInputTensor(node->value()); + auto output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Fill>(dims, value, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp new file mode 100644 index 000000000..e0a223116 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Floor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Floor>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp new file mode 100644 index 000000000..a45d89e38 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FloorDiv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::FloorDiv>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp new file mode 100644 index 000000000..a4852f13e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/FloorMod.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FloorMod.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloorMod(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFloorMod *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::FloorMod>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp new file mode 100644 index 000000000..b7b742b8a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FullyConnected.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *weights = helper.getInputTensor(node->weights()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + FullyConnectedParams params{}; + params.activation = node->fusedActivationFunction(); + params.keep_num_dims = node->keep_num_dims(); + + return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp new file mode 100644 index 000000000..2ee2906e0 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Gather.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node); + assert(node->arity() == 2); + + const Tensor *params = helper.getInputTensor(node->params()); + const Tensor *indices = helper.getInputTensor(node->indices()); + Tensor *output = helper.getOutputTensor(node); + + GatherParams gparams{}; + gparams.axis = node->axis(); + // TODO support batch_dims + gparams.batch_dims = 0; + + return std::make_unique<kernels::Gather>(params, indices, output, gparams); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp b/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp new file mode 100644 index 000000000..fc77a5817 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Gelu.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Gelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGelu *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + GeluParams params{}; + params.approximate = node->approximate(); + + return std::make_unique<kernels::Gelu>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp new file mode 100644 index 000000000..80aa63cf0 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Greater.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Greater>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp new file mode 100644 index 000000000..272f2843b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/GreaterEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::GreaterEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp b/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp new file mode 100644 index 000000000..2e62f2402 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/HardSwish.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/HardSwish.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleHardSwish(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleHardSwish *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::HardSwish>(input, output); +} +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp new file mode 100644 index 000000000..3ac7d4941 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/If.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); + assert(node->arity() == 1 + node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + const Tensor *cond = helper.getInputTensor(node->cond()); + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph()); + RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph()); + + return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph, + else_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp new file mode 100644 index 000000000..06031e5bc --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/InstanceNorm.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *gamma = helper.getInputTensor(node->gamma()); + const Tensor *beta = helper.getInputTensor(node->beta()); + + Tensor *output = helper.getOutputTensor(node); + + InstanceNormParams params{}; + params.epsilon = node->epsilon(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp new file mode 100644 index 000000000..6e22e6d4e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Normalize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + L2NormParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Normalize>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp new file mode 100644 index 000000000..95b55896f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Pool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Pool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp new file mode 100644 index 000000000..bbf5067b1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LeakyRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + LeakyReluParams params{}; + params.alpha = node->alpha(); + + return std::make_unique<kernels::LeakyRelu>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp new file mode 100644 index 000000000..ae914ecc9 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Less.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Less>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp new file mode 100644 index 000000000..f1b424b55 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LessEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LessEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp new file mode 100644 index 000000000..962ca2d7c --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LocalResponseNormalization.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + LocalResponseNormalizationParams params{}; + params.radius = node->radius(); + params.bias = node->bias(); + params.alpha = node->alpha(); + params.beta = node->beta(); + + return std::make_unique<kernels::LocalResponseNormalization>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Log.cpp b/compiler/luci-interpreter/src/loader/nodes/Log.cpp new file mode 100644 index 000000000..048e3101e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Log.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Log.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLog(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLog *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Log>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp new file mode 100644 index 000000000..432204115 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogSoftmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogSoftmax>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp new file mode 100644 index 000000000..bf3cb671a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalAnd.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalAnd>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp new file mode 100644 index 000000000..fefcd9a06 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalNot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalNot>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp new file mode 100644 index 000000000..a416cb401 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalOr.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalOr>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp new file mode 100644 index 000000000..4a69deef1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Logistic.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Logistic>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp new file mode 100644 index 000000000..f66a206ca --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MaxPool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::MaxPool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp new file mode 100644 index 000000000..d0bff776a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Maximum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Maximum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp new file mode 100644 index 000000000..0dec63e79 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mean.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + auto temp_sum_unique = + std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); + temp_sum_unique->set_observable(false); + temp_sum_unique->set_data_buffer(nullptr); + Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp new file mode 100644 index 000000000..1a49c1090 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Minimum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Minimum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp new file mode 100644 index 000000000..b221b4574 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MirrorPad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + MirrorPadParams params{}; + params.mode = node->mode(); + + return std::make_unique<kernels::MirrorPad>(input, paddings, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp new file mode 100644 index 000000000..f9984853a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mul.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + MulParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Mul>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp new file mode 100644 index 000000000..9a9ecf991 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Neg.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Neg>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp new file mode 100644 index 000000000..3916a5854 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/NotEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::NotEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp new file mode 100644 index 000000000..a40160945 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/OneHot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node); + assert(node->arity() == 4); + + const Tensor *indices = helper.getInputTensor(node->indices()); + const Tensor *depth = helper.getInputTensor(node->depth()); + const Tensor *on_value = helper.getInputTensor(node->on_value()); + const Tensor *off_value = helper.getInputTensor(node->off_value()); + Tensor *output = helper.getOutputTensor(node); + + OneHotParams params{}; + params.axis = node->axis(); + + return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp new file mode 100644 index 000000000..f3d700c95 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *alpha = helper.getInputTensor(node->alpha()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PRelu>(input, alpha, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp new file mode 100644 index 000000000..efc5850e0 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node); + assert(node->arity() == node->values_count()); + + std::vector<const Tensor *> inputs(node->values_count()); + for (uint32_t i = 0; i < node->values_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + PackParams params{}; + params.axis = node->axis(); + params.values_count = node->values_count(); + + return std::make_unique<kernels::Pack>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp new file mode 100644 index 000000000..67ce997a7 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pad>(input, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp new file mode 100644 index 000000000..e378a972a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PadV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + const Tensor *constant_values = helper.getInputTensor(node->constant_values()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp new file mode 100644 index 000000000..d32fc3dbb --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pow.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pow>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp new file mode 100644 index 000000000..cb36fb6da --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Quantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Quantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp new file mode 100644 index 000000000..1a8522dd6 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReduceMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReduceMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReduceMax *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::ReduceMax>(input, axes, output, temp_index, resolved_axes, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp new file mode 100644 index 000000000..1610e20a9 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReduceProd.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReduceProd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReduceProd *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::ReduceProd>(input, axes, output, temp_index, resolved_axes, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp new file mode 100644 index 000000000..1d64c1c4e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp new file mode 100644 index 000000000..e50cd2545 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu6.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu6>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp new file mode 100644 index 000000000..76ddd88a3 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Reshape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *shape = helper.getInputTensor(node->shape()); + Tensor *output = helper.getOutputTensor(node); + + // NOTE 'newShape' attribute is ignored. + return std::make_unique<kernels::Reshape>(input, shape, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp new file mode 100644 index 000000000..dc2b88ad3 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeBilinear.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeBilinearParams params{}; + params.align_corners = node->align_corners(); + params.half_pixel_centers = node->half_pixel_centers(); + + return std::make_unique<kernels::ResizeBilinear>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp new file mode 100644 index 000000000..c7058ae78 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeNearestNeighbor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeNearestNeighborParams params{}; + params.align_corners = node->align_corners(); + // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated + // Current CircleResizeNearestNeighbor don't have half_pixel_centers. + // default value on current is false. + // it need to be updated when CircleResizeNearestNeighbor updated. + params.half_pixel_centers = false; + + return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp new file mode 100644 index 000000000..c1a7f5350 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReverseV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *axes = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ReverseV2>(input, axes, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp new file mode 100644 index 000000000..0714a5dba --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Rsqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Rsqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp new file mode 100644 index 000000000..d172ef438 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SVDF.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node); + assert(node->arity() == 5); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *feature = helper.getInputTensor(node->weight_feature()); + const Tensor *time = helper.getInputTensor(node->weight_time()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state()); + Tensor *output = helper.getOutputTensor(node); + + auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(), + Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + if (data_type == DataType::FLOAT32 && + (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8)) + { + data_type = feature->element_type(); + } + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + data_type = DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + SVDFParams params{}; + params.activation = node->fusedActivationFunction(); + params.svdf_rank = node->svdf_rank(); + params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs(); + + return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output, + tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Select.cpp b/compiler/luci-interpreter/src/loader/nodes/Select.cpp new file mode 100644 index 000000000..a0f18047b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Select.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Select.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSelect(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSelect *>(circle_node); + assert(node->arity() == 3); + + const Tensor *c = helper.getInputTensor(node->condition()); + const Tensor *t = helper.getInputTensor(node->t()); + const Tensor *e = helper.getInputTensor(node->e()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Select>(c, t, e, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp new file mode 100644 index 000000000..d1edbc794 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Shape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node); + assert(node->arity() == 1); + + const auto input = helper.getInputTensor(node->input()); + auto output = helper.getOutputTensor(node); + + ShapeParams shape_params{}; + shape_params.out_type = node->out_type(); + + return std::make_unique<kernels::ShapeKernel>(input, output, shape_params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp new file mode 100644 index 000000000..60ac6417c --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Slice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *size = helper.getInputTensor(node->size()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Slice>(input, begin, size, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp new file mode 100644 index 000000000..f41f63f6f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Softmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + SoftmaxParams params{}; + params.beta = node->beta(); + + return std::make_unique<kernels::Softmax>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp new file mode 100644 index 000000000..b6e6cf516 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToBatchND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp new file mode 100644 index 000000000..63fdb95ec --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToDepth.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + + Tensor *output = helper.getOutputTensor(node); + + SpaceToDepthParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::SpaceToDepth>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp new file mode 100644 index 000000000..3f6d4a7df --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Split.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); + assert(node->arity() == 2); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *axis = helper.getInputTensor(node->split_dim()); + const Tensor *input = helper.getInputTensor(node->input()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp new file mode 100644 index 000000000..0788822ca --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SplitV.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node); + assert(node->arity() == 3); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *sizes_data = helper.getInputTensor(node->size_splits()); + const Tensor *axis = helper.getInputTensor(node->split_dim()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp new file mode 100644 index 000000000..b9843fe0b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Sqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp new file mode 100644 index 000000000..0ad7c1772 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Square.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Square>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp new file mode 100644 index 000000000..e4c6fd851 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SquaredDifference.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SquaredDifference>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp new file mode 100644 index 000000000..6885f8077 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Squeeze.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + SqueezeParams params{}; + params.squeeze_dims = node->squeeze_dims(); + + return std::make_unique<kernels::Squeeze>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp new file mode 100644 index 000000000..359b4e3e9 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/StridedSlice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node); + assert(node->arity() == 4); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *end = helper.getInputTensor(node->end()); + const Tensor *strides = helper.getInputTensor(node->strides()); + + Tensor *output = helper.getOutputTensor(node); + + StridedSliceParams params{}; + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp new file mode 100644 index 000000000..a6252cb53 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sub.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + SubParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Sub>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Sum.cpp b/compiler/luci-interpreter/src/loader/nodes/Sum.cpp new file mode 100644 index 000000000..6dfe362c9 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Sum.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSum *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::Sum>(input, axes, output, temp_index, resolved_axes, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp new file mode 100644 index 000000000..a58ef60a8 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Tanh.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Tanh>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp new file mode 100644 index 000000000..ea17d8311 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Transpose.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->a()); + const Tensor *perm = helper.getInputTensor(node->perm()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Transpose>(input, perm, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp new file mode 100644 index 000000000..72d1aecf7 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/TransposeConv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node); + assert(node->arity() == 4); + + const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *out_backprop = helper.getInputTensor(node->outBackprop()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + + Tensor *output = helper.getOutputTensor(node); + + DataType scratch_data_type = + helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + + auto scratch_tensor = + std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, ""); + scratch_tensor->set_observable(false); + scratch_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor)); + + TransposeConvParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + // TODO support activation + if (params.activation != luci::FusedActFunc::NONE) + { + throw std::runtime_error("Unsupported activation of TransposeConv"); + } + + return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, + tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp new file mode 100644 index 000000000..f4cf0b869 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/UnidirectionalSequenceLSTM.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleUnidirectionalSequenceLSTM(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(circle_node); + assert(node->arity() == 24); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *input_to_input_weights = + helper.getOptionalInputTensor(node->input_to_input_weights()); + const Tensor *input_to_cell_weights = helper.getInputTensor(node->input_to_cell_weights()); + const Tensor *input_to_forget_weights = helper.getInputTensor(node->input_to_forget_weights()); + const Tensor *input_to_output_weights = helper.getInputTensor(node->input_to_output_weights()); + const Tensor *recurrent_to_input_weights = + helper.getOptionalInputTensor(node->recurrent_to_input_weights()); + const Tensor *recurrent_to_cell_weights = + helper.getInputTensor(node->recurrent_to_cell_weights()); + const Tensor *recurrent_to_forget_weights = + helper.getInputTensor(node->recurrent_to_forget_weights()); + const Tensor *recurrent_to_output_weights = + helper.getInputTensor(node->recurrent_to_output_weights()); + const Tensor *cell_to_input_weights = + helper.getOptionalInputTensor(node->cell_to_input_weights()); + const Tensor *cell_to_forget_weights = + helper.getOptionalInputTensor(node->cell_to_forget_weights()); + const Tensor *cell_to_output_weights = + helper.getOptionalInputTensor(node->cell_to_output_weights()); + const Tensor *input_gate_bias = helper.getOptionalInputTensor(node->input_gate_bias()); + const Tensor *forget_gate_bias = helper.getInputTensor(node->forget_gate_bias()); + const Tensor *cell_gate_bias = helper.getInputTensor(node->cell_gate_bias()); + const Tensor *output_gate_bias = helper.getInputTensor(node->output_gate_bias()); + const Tensor *projection_weights = helper.getOptionalInputTensor(node->projection_weights()); + const Tensor *projection_bias = helper.getOptionalInputTensor(node->projection_bias()); + const Tensor *output_state = helper.getInputTensor(node->output_state()); + const Tensor *cell_state = helper.getInputTensor(node->cell_state()); + const Tensor *input_layer_norm_coefficients = + helper.getOptionalInputTensor(node->input_layer_norm_coefficients()); + const Tensor *forget_layer_norm_coefficients = + helper.getOptionalInputTensor(node->forget_layer_norm_coefficients()); + const Tensor *cell_layer_norm_coefficients = + helper.getOptionalInputTensor(node->cell_layer_norm_coefficients()); + const Tensor *output_layer_norm_coefficients = + helper.getOptionalInputTensor(node->output_layer_norm_coefficients()); + Tensor *output = helper.getOutputTensor(node); + + // scratch pad tensor + // NOTE provide more scratch pads if support hybrid or integer + auto sp_output_state = + std::make_unique<Tensor>(output_state->element_type(), Shape({}), AffineQuantization{}, ""); + sp_output_state->set_observable(false); + sp_output_state->set_data_buffer(nullptr); + Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_output_state)); + + auto sp_cell_state = + std::make_unique<Tensor>(cell_state->element_type(), Shape({}), AffineQuantization{}, ""); + sp_cell_state->set_observable(false); + sp_cell_state->set_data_buffer(nullptr); + Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_cell_state)); + + auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); + sp_3->set_observable(false); + sp_3->set_data_buffer(nullptr); + Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_3)); + + UnidirectionalSequenceLSTMParams params{}; + params.activation = node->fusedActivationFunction(); + params.cell_clip = node->cell_clip(); + params.proj_clip = node->proj_clip(); + params.time_major = node->time_major(); + params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs(); + + return std::make_unique<kernels::UnidirectionalSequenceLSTM>( + input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, + input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights, + recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights, + cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias, + cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state, cell_state, + input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients, + output_layer_norm_coefficients, output, tmp_1, tmp_2, tmp_3, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp new file mode 100644 index 000000000..a1c0d323a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Unpack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); + assert(node->arity() == 1); + assert(output_nodes.size() == static_cast<size_t>(node->num())); + + const Tensor *input = helper.getInputTensor(node->value()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + UnpackParams params{}; + params.axis = node->axis(); + + // NOTE 'num' attribute is ignored. + return std::make_unique<kernels::Unpack>(input, std::move(outputs), params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp new file mode 100644 index 000000000..8fde6ec8a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/While.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node); + + auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node); + assert(node->arity() == node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph()); + RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph()); + + return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph, + body_graph); +} + +} // namespace luci_interpreter |