summaryrefslogtreecommitdiff
path: root/runtime/onert/backend
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/backend')
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc880
-rw-r--r--runtime/onert/backend/acl_common/AclKernelGen.h315
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc856
-rw-r--r--runtime/onert/backend/cpu/Backend.h10
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h58
-rw-r--r--runtime/onert/backend/cpu/CMakeLists.txt4
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc35
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h9
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h64
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc622
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h13
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.cc106
-rw-r--r--runtime/onert/backend/cpu/StaticTensorManager.h64
-rw-r--r--runtime/onert/backend/cpu/Tensor.h18
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc20
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h13
-rw-r--r--runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc83
-rw-r--r--runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h59
-rw-r--r--runtime/onert/backend/cpu/ops/CompareLayer.cc238
-rw-r--r--runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc85
-rw-r--r--runtime/onert/backend/cpu/ops/FullyConnectedLayer.h9
-rw-r--r--runtime/onert/backend/cpu/ops/L2NormLayer.cc71
-rw-r--r--runtime/onert/backend/cpu/ops/L2NormLayer.h55
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc4
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h7
-rw-r--r--runtime/onert/backend/cpu/ops/OperationUtils.h11
-rw-r--r--runtime/onert/backend/cpu/ops/PadLayer.cc25
-rw-r--r--runtime/onert/backend/cpu/ops/PadLayer.h8
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.cc63
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/ReLU6Layer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLU6Layer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.cc38
-rw-r--r--runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc87
-rw-r--r--runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h58
-rw-r--r--runtime/onert/backend/cpu/ops/SliceLayer.cc16
-rw-r--r--runtime/onert/backend/cpu/ops/SliceLayer.h3
-rw-r--r--runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/SplitVLayer.cc99
-rw-r--r--runtime/onert/backend/cpu/ops/SplitVLayer.h60
-rw-r--r--runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h59
43 files changed, 2986 insertions, 1622 deletions
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index 3ca405899..a84f983b4 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -31,6 +31,7 @@
#include "exec/FunctionSequence.h"
#include "util/logging.h"
#include "util/Utils.h"
+#include "AclKernelGen.h"
namespace onert
{
@@ -76,15 +77,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
assert(_ctx.at(block_size_index).data());
auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -96,15 +97,27 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLCast>();
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (ifm_tensor->data_type() == ofm_tensor->data_type())
+ {
+ auto l = std::make_unique<::arm_compute::CLCopy>();
+
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::CLCast>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+ // TODO Support converting float to int32 as round down
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
auto acl_fn = asAclClFunction(std::move(fn));
@@ -132,10 +145,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -143,8 +156,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+ ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclClFunction(std::move(fn));
}
@@ -171,10 +185,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -182,8 +196,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
{
auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, multiplier, act_info);
_return_fn = asAclClFunction(std::move(fn));
}
@@ -191,88 +205,28 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -296,7 +250,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_alloc = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_builder->at(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -305,7 +259,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
if (input_indexes.size() < 2)
{
auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
+ l->configure(input_tensors.at(0), output_tensor->handle());
fn = std::move(l);
}
else
@@ -313,10 +267,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ l->configure(input_tensors, output_tensor->handle(), fixed_axis);
fn = std::move(l);
}
@@ -327,75 +281,15 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
- using ir::operation::FullyConnected;
-
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
+ auto output_tensor = _tensor_builder->at(output_index).get();
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_op_seq_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::CLFullyConnectedReshapingLayer::KernelType kernel_type =
- arm_compute::CLFullyConnectedReshapingLayer::KernelType::GENERAL;
- if (_ctx.at(weight_index).isConstant())
- {
- kernel_type = arm_compute::CLFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
- assert(_ctx.at(weight_index).data());
- }
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLFullyConnectedReshapingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, output_alloc->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -406,17 +300,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -427,14 +322,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = input_alloc->layout();
+ const auto backend_layout = input_tensor->layout();
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
@@ -443,7 +338,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_alloc->handle(), acl_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
@@ -453,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
+ l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
acl_common::convertReduceType(reduce_type));
fn = std::move(l);
@@ -469,13 +364,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
UNUSED_RELEASE(frontend_layout);
@@ -483,7 +378,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -503,10 +398,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
@@ -516,15 +411,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -538,13 +433,13 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -558,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
@@ -613,7 +508,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto fn = std::make_unique<::arm_compute::CLSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -628,10 +523,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
@@ -704,7 +599,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
strides_set, begin_mask, end_mask, shrink_axis_mask);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -720,10 +615,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
// Reversed
@@ -732,7 +627,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto fn = std::make_unique<::arm_compute::CLPermute>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -747,17 +642,18 @@ void KernelGenerator::visit(const ir::operation::Add &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Sub &node)
@@ -768,17 +664,18 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Div &node)
@@ -789,16 +686,17 @@ void KernelGenerator::visit(const ir::operation::Div &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -806,12 +704,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLExpLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -823,12 +721,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -842,20 +740,21 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+ auto beta_tensor = _tensor_builder->at(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+ beta_tensor->handle(), epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Logistic &node)
@@ -863,15 +762,15 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -884,13 +783,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
::arm_compute::BinaryLogicalOperation::AND);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -900,159 +799,8 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<::arm_compute::CLLSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1063,13 +811,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLComparison>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
(arm_compute::ComparisonOperation)comparison_type);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1107,13 +855,13 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- orig_inputs_acl_tensor_shapes.emplace_back(input_alloc->info()->tensor_shape());
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
@@ -1135,8 +883,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -1149,7 +897,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
auto l = std::make_unique<::arm_compute::CLPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
@@ -1160,7 +908,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
auto l = std::make_unique<::arm_compute::CLPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
@@ -1168,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
{
auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
@@ -1183,12 +931,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclClFunction(std::move(fn));
}
@@ -1198,15 +946,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1219,12 +967,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLScale>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
@@ -1238,15 +986,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1258,15 +1006,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1288,25 +1036,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ auto weights_tensor = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclClFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayerEx>(
+ auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
+ fn->configure(input_tensor->handle(), weights_tensor->handle(),
+ recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclClFunction(std::move(fn));
}
@@ -1315,12 +1063,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLFloor>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1335,10 +1083,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto paddings_tensor = _tensor_builder->at(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
@@ -1346,8 +1094,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
std::unique_ptr<::arm_compute::IFunction> fn;
auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
fn = std::move(l);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1362,12 +1110,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
+ auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1376,32 +1124,15 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1410,13 +1141,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+ fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1442,15 +1173,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1466,17 +1197,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hits_tensor = _tensor_builder->at(hits_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto keys_tensor = _tensor_builder->at(keys_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
+ fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1489,13 +1220,13 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto alpha_tensor = _tensor_builder->at(alpha_index).get();
- auto fn = std::make_unique<::arm_compute::CLPReLU>();
+ auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
- fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1518,7 +1249,6 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
(node.param().padding.type == ir::PaddingType::VALID));
auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
ker_shape.W, ker_shape.H);
-
uint32_t invalid_horizontal = 0;
uint32_t invalid_vertical = 0;
if (node.param().padding.type == ir::PaddingType::VALID)
@@ -1528,17 +1258,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+ tconv_info, invalid_horizontal, invalid_vertical);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1550,15 +1280,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1571,13 +1301,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1589,12 +1319,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1607,13 +1337,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1634,13 +1364,13 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_alloc = _tensor_builder->at(outputValues_index).get();
- auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
- auto input_alloc = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_builder->at(outputValues_index).get();
+ auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
+ auto input_tensor = _tensor_builder->at(inputData_index).get();
auto fn = std::make_unique<::arm_compute::CLTopKV2>();
- fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
+ fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1659,9 +1389,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto indices_tensor = _tensor_builder->at(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1671,43 +1401,43 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- const auto backend_layout = ofm_alloc->layout();
+ const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == ifm_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
auto fn = std::make_unique<::arm_compute::CLGatherEx>();
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
- assert(n == ifm_alloc->num_dimensions());
+ assert(n == ifm_tensor->num_dimensions());
size_t k = _ctx.at(indices_index).shape().rank();
- assert(k == indices_alloc->num_dimensions());
+ assert(k == indices_tensor->num_dimensions());
// Disable applied dim_correction
- const auto orig_ifm_acl_tensor_shape = ifm_alloc->info()->tensor_shape();
- if (n != ifm_alloc->info()->num_dimensions())
+ const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
+ if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
const auto ifm = _ctx.at(ifm_index);
- ifm_alloc->info()->set_tensor_shape(
+ ifm_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
}
- const auto orig_indice_acl_tensor_shape = indices_alloc->info()->tensor_shape();
- if (k != indices_alloc->info()->num_dimensions())
+ const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
+ if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
const auto indices = _ctx.at(indices_index);
- indices_alloc->info()->set_tensor_shape(
+ indices_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+ fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
- ifm_alloc->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
- indices_alloc->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+ ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
+ indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1719,12 +1449,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLNeg>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1736,15 +1466,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1761,11 +1491,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
- auto backend_layout = ifm_alloc->layout();
+ auto backend_layout = ifm_tensor->layout();
int axis_value = node.param().axis;
if (axis_value < 0)
@@ -1776,10 +1506,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgOperation>();
+ auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
- ::arm_compute::ArgOperation::MAX);
+ fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1791,12 +1521,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLCast>();
+ auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1814,15 +1544,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1837,12 +1567,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
+ auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -1860,13 +1590,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ICLTensor *> output_allocs;
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
auto axis = node.param().axis;
if (axis < 0)
axis += ifm_rank;
@@ -1874,7 +1604,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
auto fn = std::make_unique<::arm_compute::CLSplit>();
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
+ fn->configure(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclClFunction(std::move(fn));
}
@@ -1906,13 +1636,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_alloc = _tensor_builder->at(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
- assert(output_rank == output_alloc->num_dimensions());
- if (output_rank != output_alloc->info()->num_dimensions())
+ const auto &output_tensor = _tensor_builder->at(output_index);
+ orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+ assert(output_rank == output_tensor->num_dimensions());
+ if (output_rank != output_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
@@ -1959,12 +1689,12 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
@@ -1982,13 +1712,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -2001,13 +1731,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
@@ -2019,12 +1749,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
0);
auto acl_fn = asAclClFunction(std::move(fn));
@@ -2037,12 +1767,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
0);
auto acl_fn = asAclClFunction(std::move(fn));
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
new file mode 100644
index 000000000..9f7ce3764
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+#define __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+
+#include <exec/IFunction.h>
+#include <ir/Operands.h>
+
+#include <ir/operation/LSTM.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+ typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+{
+ // TODO Support dynamic rnn
+ // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+ const auto cell_threshold = node.param().cell_threshold;
+ const auto projection_threshold = node.param().projection_threshold;
+
+ bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
+ operands.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+ // true: no CIFG
+ // false: CIFG
+ // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+ // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE Although the projection weights has data the projection bias may not have data.
+ bool has_projection_param = has_projection_weights;
+
+ const auto activation = node.param().activation;
+ const auto cell_clip = cell_threshold;
+ const auto projection_clip = projection_threshold;
+ assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+ auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
+ auto output_tensor = tensor_builder->at(output_index).get();
+
+ auto input_tensor = tensor_builder->at(input_index).get();
+
+ auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto recurrent_to_forget_weights_tensor =
+ tensor_builder->at(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ auto recurrent_to_output_weights_tensor =
+ tensor_builder->at(recurrent_to_output_weights_index).get();
+
+ auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<T_ACLLayer>();
+
+ ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
+ if (has_cifg_param)
+ {
+ auto input_to_input_weights_tensor =
+ tensor_builder->at(input_to_input_weights_index).get(); // optional
+ auto recurrent_to_input_weights_tensor =
+ tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ auto cell_to_input_weights_handle =
+ has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ : nullptr; // optional (non-cifg && peephole)
+ auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
+ recurrent_to_input_weights_tensor->handle(),
+ cell_to_input_weights_handle, input_gate_bias_tensor->handle());
+ }
+ if (has_peephole_param)
+ {
+ auto cell_to_forget_weights_tensor =
+ tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ auto cell_to_output_weights_tensor =
+ tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
+ cell_to_output_weights_tensor->handle());
+ }
+ if (has_projection_param)
+ {
+ auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
+ auto projection_bias_handle = has_projection_bias
+ ? tensor_builder->at(projection_bias_index).get()->handle()
+ : nullptr; // optional
+ lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
+ }
+
+ fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(),
+ recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
+ cell_clip, projection_clip);
+
+ return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+ typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+ const auto input_rank = operands.at(input_index).shape().rank();
+
+ const auto output_size =
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ UNUSED_RELEASE(output_size);
+ assert(operands.at(bias_index).shape().dim(0) == output_size);
+ assert(operands.at(weight_index).shape().dim(0) == output_size);
+ const auto batch_size =
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+ const auto input_size =
+ operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+
+ // Check for reshaping input's shape into rank-2
+ bool needs_reshape = false;
+ ir::Shape reshape(2);
+ if (input_rank == 3 || input_rank == 4)
+ {
+ const auto &ifm_shape = operands.at(input_index).shape();
+ auto feature_size = 1;
+ for (int i = 0; i < ifm_shape.rank(); ++i)
+ {
+ feature_size *= ifm_shape.dim(i);
+ }
+
+ UNUSED_RELEASE(feature_size);
+ assert(feature_size == batch_size * input_size);
+
+ // for reshaping
+ needs_reshape = true;
+ reshape.dim(0) = batch_size; /* H */
+ reshape.dim(1) = input_size; /* W */
+ }
+
+ auto output_tensor = tensor_builder->at(output_index).get();
+ const auto input_tensor = tensor_builder->at(input_index).get();
+ const auto weight_tensor = tensor_builder->at(weight_index).get();
+ const auto bias_tensor = tensor_builder->at(bias_index).get();
+ const auto frontend_layout = layout;
+ const auto acl_layout = output_tensor->handle()->info()->data_layout();
+
+ auto fn =
+ std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
+ if (operands.at(weight_index).isConstant())
+ {
+ kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
+ assert(operands.at(weight_index).data());
+ }
+
+ fn->configure(
+ input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
+ output_tensor->handle(), needs_reshape,
+ ::onert::backend::acl_common::asTensorShape(
+ reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+ kernel_type);
+
+ return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+std::unique_ptr<::arm_compute::IFunction>
+kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ ::arm_compute::PoolingType pooling_type)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(0)};
+
+ const auto ofm_shape = operands.at(ofm_index).shape().asFeature(layout);
+ const auto ifm_shape = operands.at(ifm_index).shape().asFeature(layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+
+ VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
+ VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
+ VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_tensor = tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+ acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+ auto fn = std::make_unique<T_ACLLayer>();
+
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+
+ return fn;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index e47186754..1195b83cc 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -31,6 +31,7 @@
#include "exec/NopFunction.h"
#include "util/logging.h"
#include "util/Utils.h"
+#include "AclKernelGen.h"
namespace onert
{
@@ -74,15 +75,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -96,10 +97,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
- auto backend_layout = ifm_alloc->layout();
+ auto backend_layout = ifm_tensor->layout();
int axis_value = node.param().axis;
if (axis_value < 0)
@@ -112,7 +113,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
- fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
arm_compute::ReductionOperation::ARG_IDX_MAX);
auto acl_fn = asAclFunction(std::move(fn));
@@ -127,15 +128,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
assert(_ctx.at(block_size_index).data());
auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -147,15 +148,26 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::NECast>();
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (ifm_tensor->data_type() == ofm_tensor->data_type())
+ {
+ auto l = std::make_unique<::arm_compute::NECopy>();
- auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::NECast>();
+
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
auto acl_fn = asAclFunction(std::move(fn));
@@ -183,10 +195,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -194,8 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+ ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -208,12 +221,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
auto acl_fn = asAclFunction(std::move(fn));
@@ -242,10 +255,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -253,8 +266,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
{
auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -265,12 +278,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -279,88 +292,28 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -383,7 +336,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_alloc = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_builder->at(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -392,7 +345,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
if (input_indexes.size() < 2)
{
auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
+ l->configure(input_tensors.at(0), output_tensor->handle());
fn = std::move(l);
}
else
@@ -400,10 +353,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ l->configure(input_tensors, output_tensor->handle(), fixed_axis);
fn = std::move(l);
}
@@ -418,13 +371,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+ fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -436,12 +389,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NEFloor>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -450,76 +403,15 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
- using ir::operation::FullyConnected;
-
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
+ auto output_tensor = _tensor_builder->at(output_index).get();
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_op_seq_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
- arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
- if (_ctx.at(weight_index).isConstant())
- {
- kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
- assert(_ctx.at(weight_index).data());
- }
-
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
+ ::arm_compute::NEFullyConnectedReshapingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)),
- ActivationBuilder::generate(activation, output_alloc->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -531,17 +423,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hits_tensor = _tensor_builder->at(hits_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto keys_tensor = _tensor_builder->at(keys_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
+ fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -561,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
- const auto backend_layout = ofm_alloc->layout();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto indices_tensor = _tensor_builder->at(indices_index).get();
+ const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
// NOTE The frontend layout and backend layout must be the same for this operation.
@@ -575,35 +467,35 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == ifm_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
auto fn = std::make_unique<::arm_compute::NEGatherEx>();
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
- assert(n == ifm_alloc->num_dimensions());
+ assert(n == ifm_tensor->num_dimensions());
size_t k = _ctx.at(indices_index).shape().rank();
- assert(k == indices_alloc->num_dimensions());
+ assert(k == indices_tensor->num_dimensions());
// Disable applied dim_correction
- if (n != ifm_alloc->info()->num_dimensions())
+ if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
const auto ifm = _ctx.at(ifm_index);
- ifm_alloc->info()->set_tensor_shape(
+ ifm_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
}
- if (k != indices_alloc->info()->num_dimensions())
+ if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
const auto indices = _ctx.at(indices_index);
- indices_alloc->info()->set_tensor_shape(
+ indices_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+ fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -621,20 +513,20 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+ auto beta_tensor = _tensor_builder->at(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+ beta_tensor->handle(), epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -656,15 +548,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -673,32 +565,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -712,15 +587,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -733,13 +608,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -751,12 +626,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -769,13 +644,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NELogicalOr>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -787,8 +662,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
@@ -798,7 +673,7 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
// instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -807,159 +682,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<::arm_compute::NELSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
}
void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -970,18 +694,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
// RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Neg &node)
@@ -989,12 +713,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NENegLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1030,12 +754,12 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
@@ -1094,8 +818,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -1108,7 +832,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
@@ -1119,7 +843,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
@@ -1127,7 +851,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
{
auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
@@ -1143,15 +867,15 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto alpha_tensor = _tensor_builder->at(alpha_index).get();
std::unique_ptr<::arm_compute::IFunction> fn;
- auto l = std::make_unique<::arm_compute::NEPReLU>();
+ auto l = std::make_unique<::arm_compute::NEPReluLayer>();
- l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
@@ -1166,14 +890,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = input_alloc->layout();
+ const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
const auto reduce_type = node.param().reduce_type;
@@ -1182,11 +906,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- // NOTE NEReduceMean has a bug that does not support NHWC layout
- // NEReduceMean intermediate tensors are always NCHW layout
- auto l = std::make_unique<::arm_compute::NEReduceMeanEx>();
+ auto l = std::make_unique<::arm_compute::NEReduceMean>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
@@ -1194,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
{
auto l = std::make_unique<::arm_compute::NEReduceSum>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
@@ -1202,7 +924,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
{
auto l = std::make_unique<::arm_compute::NEReduceOperation>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle(),
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
acl_common::convertReduceType(reduce_type));
fn = std::move(l);
@@ -1218,15 +940,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1238,15 +960,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1258,15 +980,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1278,13 +1000,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
UNUSED_RELEASE(frontend_layout);
@@ -1292,7 +1014,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1305,12 +1027,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NEScale>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
@@ -1334,25 +1056,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ auto weights_tensor = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayerEx>(
+ auto fn = std::make_unique<::arm_compute::NERNNLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
+ fn->configure(input_tensor->handle(), weights_tensor->handle(),
+ recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1361,12 +1083,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
@@ -1383,10 +1105,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
@@ -1396,15 +1118,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1417,13 +1139,25 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = input_tensor->layout();
+
+ // Disable applied dim_correction
+ const size_t input_rank = _ctx.at(input_index).shape().rank();
+ if (input_rank != input_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ const auto input = _ctx.at(input_index);
+ input_tensor->info()->set_tensor_shape(
+ acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
+ }
auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1438,20 +1172,18 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto paddings_tensor = _tensor_builder->at(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
- // not 0.
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1465,12 +1197,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1489,13 +1221,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ITensor *> output_allocs;
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
auto axis = node.param().axis;
if (axis < 0)
axis += ifm_rank;
@@ -1503,7 +1235,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
auto fn = std::make_unique<::arm_compute::NESplit>();
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
+ fn->configure(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1513,15 +1245,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1534,13 +1266,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1555,17 +1287,17 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1575,10 +1307,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
@@ -1628,7 +1360,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
auto fn = std::make_unique<::arm_compute::NESlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1643,10 +1375,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
@@ -1715,7 +1447,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
strides_set, begin_mask, end_mask, shrink_axis_mask);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1749,16 +1481,16 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+ tconv_info, invalid_horizontal, invalid_vertical);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1771,10 +1503,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
const auto rank = _ctx.at(ifm_idx).shape().rank();
std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
@@ -1783,11 +1515,11 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
+ if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
auto l = std::make_unique<::arm_compute::NETranspose>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
@@ -1795,7 +1527,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
{
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
fn = std::move(l);
}
@@ -1834,13 +1566,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_alloc = _tensor_builder->at(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
- assert(output_rank == output_alloc->num_dimensions());
- if (output_rank != output_alloc->info()->num_dimensions())
+ const auto &output_tensor = _tensor_builder->at(output_index);
+ orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+ assert(output_rank == output_tensor->num_dimensions());
+ if (output_rank != output_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
@@ -1858,17 +1590,17 @@ void KernelGenerator::visit(const ir::operation::Add &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Div &node)
@@ -1879,16 +1611,16 @@ void KernelGenerator::visit(const ir::operation::Div &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -1896,12 +1628,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEExpLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1913,12 +1645,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1933,13 +1665,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
(arm_compute::ComparisonOperation)comparison_type);
auto acl_fn = asAclFunction(std::move(fn));
@@ -1953,13 +1685,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
@@ -1972,13 +1704,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 2daf06aca..56bd352e0 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -17,6 +17,7 @@
#ifndef __ONERT_BACKEND_CPU_BACKEND_H__
#define __ONERT_BACKEND_CPU_BACKEND_H__
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
@@ -39,9 +40,9 @@ public:
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
@@ -49,7 +50,8 @@ public:
auto tb = std::make_shared<TensorBuilder>();
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
return context;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
new file mode 100644
index 000000000..f314a8e39
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+ std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+ std::shared_ptr<IOptimizer> optimizer = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
+ kernel_gen, tensor_register, optimizer),
+ _external_context(new ExternalContext)
+ {
+ }
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index e997a2291..01a3cd178 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -1,5 +1,7 @@
set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
+nnfw_find_package(Ruy REQUIRED)
+
file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
@@ -8,6 +10,8 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index 71e313628..deb27f0fe 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -15,6 +15,7 @@
*/
#include "ConstantInitializer.h"
+#include "Tensor.h"
namespace onert
{
@@ -30,39 +31,61 @@ ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
// DO NOTHING
}
+void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ registerExternalInitializer(index, obj);
+}
+
+void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
+ auto data = model_obj.shareData();
+ assert(data && data->base());
+ ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
+ tensor.setData(data);
+ };
+}
+
void ConstantInitializer::visit(const ir::operation::Conv2D &node)
{
const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
+ registerExternalInitializer(kernel_index, kernel_obj);
const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
{
const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
+ registerExternalInitializer(kernel_index, kernel_obj);
const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
{
const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
const auto &weight_obj = _operands.at(weight_index);
- registerCopyInitializer(weight_index, weight_obj);
+ registerExternalInitializer(weight_index, weight_obj);
const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
if (!bias_index.undefined())
{
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index bd06c64d1..de03a693a 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -36,6 +36,15 @@ public:
const std::shared_ptr<TensorBuilder> &tensor_builder);
public:
+ void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+ // TODO: For now the only cpu backend supports constant tensor to use data from external
+ // If the other backend supports (to do this,
+ // ExternalTensor should be abstract such as IExternal, maybe),
+ // this can be an interface of IConstantInitializer
+ void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+public:
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
void visit(const ir::operation::FullyConnected &) override;
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
new file mode 100644
index 000000000..6627412d2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+
+#include <backend/IExternalContext.h>
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ExternalContext : public IExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(new ruy::Context)
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+#ifdef USE_RUY_GEMV
+ _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
+#endif
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->max_num_threads = target_num_threads;
+ }
+
+ ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 72f960675..7939fe894 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -20,6 +20,7 @@
#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
#include "ops/AvgPoolLayer.h"
+#include "ops/BatchToSpaceNDLayer.h"
#include "ops/CastLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
@@ -49,7 +50,9 @@
#include "ops/RangeLayer.h"
#include "ops/ReduceLayer.h"
#include "ops/ReLULayer.h"
+#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
+#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
#include "ops/RoundLayer.h"
#include "ops/RsqrtLayer.h"
@@ -60,7 +63,9 @@
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
#include "ops/SpaceToBatchNDLayer.h"
+#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
+#include "ops/SplitVLayer.h"
#include "ops/SubLayer.h"
#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
@@ -70,11 +75,14 @@
#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
#include "ops/LogicalOrLayer.h"
+#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
+#include "ops/QuantizeLayer.h"
+#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
#include <backend/IConfig.h>
@@ -119,9 +127,11 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder)
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
+ _external_context(external_context)
{
// DO NOTHING
}
@@ -184,10 +194,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
- auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
- auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+ auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
@@ -196,9 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, param_padding.param.left,
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_alloc);
+ stride.horizontal, stride.vertical, activation, ofm_tensor);
_return_fn = std::move(fn);
return;
@@ -213,9 +223,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto padding =
ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, padding.left, padding.right,
- padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
- ofm_alloc);
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -241,16 +251,16 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
- auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
- auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+ auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
- ofm_alloc);
+ ofm_tensor);
_return_fn = std::move(fn);
}
@@ -270,13 +280,13 @@ void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::MaxPoolLayer>();
- fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -295,13 +305,13 @@ void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::AvgPoolLayer>();
- fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -313,7 +323,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
@@ -321,7 +331,33 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
auto fn = std::make_unique<ops::ConcatLayer>();
- fn->configure(input_tensors, axis, output_alloc);
+ fn->configure(input_tensors, axis, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
+ const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
+
+ auto output_alloc = _tensor_builder->portableAt(output_index).get();
+ auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+
+ auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
+
+ IPortableTensor *crops_alloc = nullptr;
+ const auto NNApiInputs = 2;
+
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
+ crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ }
+
+ fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
_return_fn = std::move(fn);
}
@@ -332,13 +368,13 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto value_alloc = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto value_tensor = _tensor_builder->portableAt(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_alloc, value_alloc, output_alloc);
+ fn->configure(input_tensor, value_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -353,15 +389,16 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto weight_alloc = _tensor_builder->portableAt(weight_index).get();
- auto bias_alloc =
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto bias_tensor =
bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
- fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
+ _external_context);
_return_fn = std::move(fn);
}
@@ -371,21 +408,21 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
// optional 2nd input
- IPortableTensor *shape_alloc = nullptr;
+ IPortableTensor *shape_tensor = nullptr;
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_builder->portableAt(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
- fn->configure(input_alloc, shape_alloc, output_alloc);
+ fn->configure(input_tensor, shape_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -394,13 +431,13 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
- fn->configure(input_alloc, nullptr, output_alloc);
+ fn->configure(input_tensor, nullptr, output_tensor);
_return_fn = std::move(fn);
}
@@ -412,12 +449,12 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
- fn->configure(input_alloc, beta, output_alloc);
+ fn->configure(input_tensor, beta, output_tensor);
_return_fn = std::move(fn);
}
@@ -430,13 +467,13 @@ void KernelGenerator::visit(const ir::operation::Add &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::AddLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -447,15 +484,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto comparison_type = node.param().comparison_type;
auto fn = std::make_unique<ops::CompareLayer>();
- fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -466,11 +503,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
// NOTE The frontend layout and backend layout must be the same for this operation.
@@ -481,8 +518,8 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == input_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == input_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
@@ -492,7 +529,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
auto fn = std::make_unique<ops::GatherLayer>();
- fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+ fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
_return_fn = std::move(fn);
}
@@ -506,13 +543,13 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::SubLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -526,13 +563,13 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MulLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -547,18 +584,18 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
- auto depth_alloc = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_alloc = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_alloc = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
+ auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
+ auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
- assert(indices_alloc->data_type() == OperandType::INT32);
- assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+ assert(indices_tensor->data_type() == OperandType::INT32);
+ assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
auto fn = std::make_unique<ops::OneHotLayer>();
- fn->configure(indices_alloc, depth_alloc, onvalue_alloc, offvalue_alloc, output_alloc, axis);
+ fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
_return_fn = std::move(fn);
}
@@ -572,13 +609,13 @@ void KernelGenerator::visit(const ir::operation::Div &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::DivLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -587,16 +624,16 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
- std::vector<const IPortableTensor *> input_allocs;
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
const auto equation = node.param().equation;
auto fn = std::make_unique<ops::EinsumLayer>();
- fn->configure(input_allocs, equation, output_alloc);
+ fn->configure(input_tensors, equation, output_tensor);
_return_fn = std::move(fn);
}
@@ -605,14 +642,14 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
{
auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
std::vector<custom::TypeInfo> &types,
- std::vector<std::shared_ptr<IPortableTensor>> &allocs) {
+ std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_alloc = _tensor_builder->portableAt(idx);
- allocs.emplace_back(in_alloc);
+ auto in_tensor = _tensor_builder->portableAt(idx);
+ tensors.emplace_back(in_tensor);
}
};
@@ -634,12 +671,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ExpLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -650,13 +687,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_alloc, axis_alloc, output_alloc);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -666,12 +703,12 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogisticLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -681,12 +718,12 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::TanhLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -700,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
assert(-rank <= axis && axis < rank);
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
@@ -708,7 +745,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
auto fn = std::make_unique<ops::PackLayer>();
- fn->configure(input_tensors, axis, output_alloc);
+ fn->configure(input_tensors, axis, output_tensor);
_return_fn = std::move(fn);
}
@@ -722,7 +759,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
@@ -732,7 +769,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
- fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+ fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
_return_fn = std::move(fn);
}
@@ -751,8 +788,16 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto fn = std::make_unique<ops::PadLayer>();
- fn->configure(input, output, pad_base, pad_rank);
+ bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+ const void *value = nullptr;
+
+ if (isPadV2)
+ {
+ const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
+ value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
+ }
+ fn->configure(input, output, pad_base, pad_rank, value);
_return_fn = std::move(fn);
}
@@ -762,13 +807,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MaxLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -779,13 +824,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MinLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -795,12 +840,12 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::CastLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -810,12 +855,12 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
- fn->configure(input_alloc, output_alloc, node.param().perm);
+ fn->configure(input_tensor, output_tensor, node.param().perm);
_return_fn = std::move(fn);
}
@@ -827,15 +872,15 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axes_alloc = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
auto fn = std::make_unique<ops::MeanLayer>();
- fn->configure(input_alloc, axes_alloc, output_alloc, keep_dims);
+ fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
_return_fn = std::move(fn);
}
@@ -844,7 +889,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
auto fn = std::make_unique<ops::ReduceLayer>();
const auto reduce_type = convertReduceType(node.param().reduce_type);
- fn->configure(input_alloc, axes_alloc, output_alloc, reduce_type, keep_dims);
+ fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
_return_fn = std::move(fn);
}
@@ -855,12 +900,27 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ReLULayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ReLU6Layer>();
+
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -872,14 +932,14 @@ void KernelGenerator::visit(const ir::operation::Select &node)
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto condition_alloc = _tensor_builder->portableAt(condition_index).get();
- auto true_alloc = _tensor_builder->portableAt(true_index).get();
- auto false_alloc = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
+ auto true_tensor = _tensor_builder->portableAt(true_index).get();
+ auto false_tensor = _tensor_builder->portableAt(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
- fn->configure(condition_alloc, true_alloc, false_alloc, output_alloc);
+ fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -891,14 +951,14 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto begins_alloc = _tensor_builder->portableAt(begins_index).get();
- auto sizes_alloc = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
+ auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
- fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+ fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -911,11 +971,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto starts_alloc = _tensor_builder->portableAt(starts_index).get();
- auto ends_alloc = _tensor_builder->portableAt(ends_index).get();
- auto strides_alloc = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
+ auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
+ auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
@@ -923,7 +983,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
auto fn = std::make_unique<ops::StridedSliceLayer>();
- fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+ fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
end_mask, shrink_axis_mask);
_return_fn = std::move(fn);
@@ -957,12 +1017,12 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::AbsLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -972,12 +1032,12 @@ void KernelGenerator::visit(const ir::operation::Sin &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::SinLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -987,12 +1047,12 @@ void KernelGenerator::visit(const ir::operation::Cos &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::CosLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -1002,12 +1062,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::RsqrtLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -1017,12 +1077,33 @@ void KernelGenerator::visit(const ir::operation::Shape &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
+
+ auto output_height = node.param().height_out;
+ auto output_width = node.param().width_out;
+ auto align_corners = node.param().align_corners;
+ auto half_pixel_centers = node.param().half_pixel_centers;
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ResizeBilinearLayer>();
+
+ fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
+ half_pixel_centers);
_return_fn = std::move(fn);
}
@@ -1033,13 +1114,13 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
- fn->configure(input_alloc, axis_alloc, output_alloc);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1049,12 +1130,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::NegLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -1066,12 +1147,12 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_alloc, output_alloc, axis, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
_return_fn = std::move(fn);
}
@@ -1082,13 +1163,13 @@ void KernelGenerator::visit(const ir::operation::Pow &node)
const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ir::Activation::NONE, output_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
@@ -1098,12 +1179,12 @@ void KernelGenerator::visit(const ir::operation::Log &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::LogLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -1113,12 +1194,12 @@ void KernelGenerator::visit(const ir::operation::Round &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::RoundLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1128,12 +1209,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogicalNotLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1144,28 +1225,43 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
const auto lhs_index{node.getInputs().at(0)};
const auto rhs_index{node.getInputs().at(1)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::LogicalOrLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto input_index{node.getInputs().at(0)};
auto output_alloc = _tensor_builder->portableAt(output_index).get();
auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::L2NormLayer>();
fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ZerosLikeLayer>();
+
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1176,14 +1272,14 @@ void KernelGenerator::visit(const ir::operation::Range &node)
const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto start_alloc = _tensor_builder->portableAt(start_index).get();
- auto limit_alloc = _tensor_builder->portableAt(limit_index).get();
- auto delta_alloc = _tensor_builder->portableAt(delta_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto start_tensor = _tensor_builder->portableAt(start_index).get();
+ auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
+ auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(start_alloc, limit_alloc, delta_alloc, output_alloc);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1193,13 +1289,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -1209,13 +1305,13 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto multiples_alloc = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
- fn->configure(input_alloc, multiples_alloc, output_alloc);
+ fn->configure(input_tensor, multiples_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1226,14 +1322,14 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto num_lower_alloc = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_alloc = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
+ auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
- fn->configure(input_alloc, num_lower_alloc, num_upper_alloc, output_alloc);
+ fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1243,16 +1339,16 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
auto fn = std::make_unique<ops::BatchMatMulLayer>();
- fn->configure(lhs_alloc, rhs_alloc, adj_x, adj_y, output_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
_return_fn = std::move(fn);
}
@@ -1262,13 +1358,13 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
- fn->configure(input_alloc, shape_alloc, output_alloc);
+ fn->configure(input_tensor, shape_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1277,10 +1373,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
- std::vector<const IPortableTensor *> input_allocs;
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
@@ -1288,7 +1384,7 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
auto fn = std::make_unique<ops::FusedBatchNormLayer>();
- fn->configure(input_allocs, epsilon, is_training, data_format, output_alloc);
+ fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
_return_fn = std::move(fn);
}
@@ -1301,12 +1397,12 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
- fn->configure(input_alloc, beta, axis, output_alloc);
+ fn->configure(input_tensor, beta, axis, output_tensor);
_return_fn = std::move(fn);
}
@@ -1318,14 +1414,84 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_shape_alloc = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_alloc = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
+ auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
- fn->configure(input_alloc, block_shape_alloc, padding_alloc, output_alloc);
+ fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Quantize &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+ auto fn = std::make_unique<ops::QuantizeLayer>();
+
+ fn->configure(input_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+ auto fn = std::make_unique<ops::SpaceToDepthLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
+ const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
+
+ auto output_alloc = _tensor_builder->portableAt(output_index).get();
+ auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+
+ auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
+
+ fn->configure(shape_alloc, seed_alloc, output_alloc);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+ const auto num_splits = node.param().num_splits;
+ assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+ const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+ const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+ auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
+ auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+
+ std::vector<IPortableTensor *> out_tensors;
+ for (auto &output_idx : node.getOutputs())
+ out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+
+ auto fn = std::make_unique<ops::SplitVLayer>();
+
+ fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
_return_fn = std::move(fn);
}
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index d6f4c2825..40c056a96 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -17,6 +17,7 @@
#ifndef __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
+#include "ExternalContext.h"
#include "TensorBuilder.h"
#include "Tensor.h"
@@ -37,7 +38,8 @@ class KernelGenerator : public IKernelGenerator
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
using IKernelGenerator::visit;
@@ -74,6 +76,7 @@ public:
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
@@ -83,6 +86,7 @@ public:
void visit(const ir::operation::Sin &) override;
void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
@@ -94,13 +98,19 @@ public:
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BroadcastTo &) override;
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::Quantize &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::SplitV &) override;
private:
const ir::Operands &_ctx;
@@ -108,6 +118,7 @@ private:
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
};
} // namespace cpu
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
new file mode 100644
index 000000000..78c98dabf
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+ cpu_common::DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
+{
+ // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+ _nonconst_mgr->allocate();
+
+ for (auto &pair : _tensors->native_tensors())
+ {
+ const auto &ind = pair.first;
+ auto tensor = pair.second;
+ if (!_as_constants[ind] && !tensor->is_dynamic())
+ {
+ auto *buffer = _nonconst_mgr->getBuffer(ind);
+ tensor->setBuffer(buffer);
+
+ VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
+ }
+ }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+ const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+ bool as_const)
+{
+ assert(!_tensors->getITensor(ind));
+ if (as_const)
+ {
+ auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, tensor);
+ }
+ else
+ {
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
+ _tensors->setNativeTensor(ind, tensor);
+ }
+ _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ assert(_tensors->getITensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getITensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+ assert(_tensors->getITensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getITensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (const auto &it : _tensors->native_tensors())
+ fn(it.first);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
new file mode 100644
index 000000000..2af61e4e7
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+
+#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class StaticTensorManager : public backend::IStaticTensorManager
+{
+public:
+ StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+ cpu_common::DynamicTensorManager *dynamic_tensor_manager);
+ virtual ~StaticTensorManager() = default;
+
+ void allocateNonconsts(void);
+ void deallocateNonconsts(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout, bool as_const);
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+ std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
+ ir::OperandIndexMap<bool> _as_constants;
+ cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 4dd251bd3..20e60260c 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -29,15 +29,22 @@ namespace cpu
using Tensor = cpu_common::Tensor;
-// Tensor which has data from external. To support this, assume below things
-// no padding, always NHWC layout, constant tensor and not dynamic
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ * instead of allocating and copying the data. ExternalTensor's data pointer points to
+ * an address of memory such as where memory is already allocated, or mmapped area.
+ * This is meaning that ExternalTensor can take all of types' ir::Data.
+ * To support this, assume below things no padding, always NHWC layout,
+ * constant tensor and not dynamic.
+ */
class ExternalTensor : public Tensor
{
public:
ExternalTensor() = delete;
public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) : Tensor(info, layout)
+ ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+ : Tensor(info, layout, nullptr)
{
assert(_layout == ir::Layout::NHWC);
assert(_info.isConstant());
@@ -45,6 +52,11 @@ public:
}
public:
+ /**
+ * @brief set Data to be shared from external so that this ExternalTensor will not be
+ * allocated on CPU backend
+ * @param[in] data data of Operand to be set
+ */
void setData(const std::shared_ptr<ir::Data> data)
{
assert(data != nullptr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index 886e8d820..ab8ba5756 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -29,8 +29,8 @@ namespace cpu
TensorBuilder::TensorBuilder()
: _tensor_reg{new cpu_common::TensorRegistry()},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
- _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
/* empty */
}
@@ -77,11 +77,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
-void TensorBuilder::prepare(void)
-{
- _static_tensor_mgr->allocateConsts();
- _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
void TensorBuilder::allocate()
{
@@ -99,17 +95,17 @@ std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandInde
return _tensor_reg->getPortableTensor(ind);
}
-bool TensorBuilder::setExternalTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
+bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor)
{
- return _tensor_reg->setExternalTensor(ind, tensor);
+ return _tensor_reg->setMigrantTensor(ind, tensor);
}
void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
{
- return _tensor_reg->getManagedTensor(ind);
+ return _tensor_reg->getNativeTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index ba25451ec..617136514 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -18,13 +18,14 @@
#define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/StaticTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
#include <unordered_map>
namespace onert
@@ -80,17 +81,17 @@ public:
* If not, program will crash with assert or exception.
* @return shared_ptr<Tensor>
*/
- std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
+ std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setExternalTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override;
std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
};
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
new file mode 100644
index 000000000..f2f10eb9d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceNDLayer.h"
+
+#include <cker/operation/BatchToSpaceND.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+BatchToSpaceNDLayer::BatchToSpaceNDLayer()
+ : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
+{
+ const int32_t NNapiCrops[]{0, 0, 0, 0};
+ const int32_t *_crops_buffer;
+
+ if (_crops == nullptr)
+ {
+ _crops_buffer = NNapiCrops;
+ }
+ else
+ {
+ _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+ }
+ nnfw::cker::BatchToSpaceND<T>(
+ getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+}
+
+void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ IPortableTensor *block_shape, IPortableTensor *crops)
+{
+ _output = output;
+ _input = input;
+ _block_shape = block_shape;
+ _crops = crops;
+}
+
+void BatchToSpaceNDLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ batchToSpaceNDGeneric<float>();
+ }
+ else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ batchToSpaceNDGeneric<uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"NYI"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h
new file mode 100644
index 000000000..6e25b241b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class BatchToSpaceNDLayer : public ::onert::exec::IFunction
+{
+public:
+ BatchToSpaceNDLayer();
+
+public:
+ template <typename T> void batchToSpaceNDGeneric();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ IPortableTensor *block_shape, IPortableTensor *crops);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+ IPortableTensor *_block_shape;
+ IPortableTensor *_crops;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CompareLayer.cc b/runtime/onert/backend/cpu/ops/CompareLayer.cc
index f557f3ade..adf902aaf 100644
--- a/runtime/onert/backend/cpu/ops/CompareLayer.cc
+++ b/runtime/onert/backend/cpu/ops/CompareLayer.cc
@@ -17,6 +17,7 @@
#include "OperationUtils.h"
+#include <assert.h>
#include <cker/operation/Comparison.h>
using namespace nnfw::cker;
namespace onert
@@ -34,6 +35,14 @@ namespace
using OpType = onert::ir::operation::Comparison::ComparisonType;
using namespace onert::backend::cpu;
+// Assumes these enum values to be in the order like this
+static_assert(static_cast<int>(OpType::Equal) == 0, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::NotEqual) == 1, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Greater) == 2, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::GreaterEqual) == 3, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Less) == 4, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::LessEqual) == 5, "An OpType value has changed!");
+
template <typename T>
void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
OpType op_type)
@@ -52,95 +61,33 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
&params.input2_shift);
params.is_broadcast = !HaveSameShapes(lhs, rhs);
- if (params.is_broadcast)
- {
- switch (op_type)
- {
- case OpType::Equal:
- Broadcast4DSlowEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- Broadcast4DSlowNotEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- Broadcast4DSlowGreaterWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- Broadcast4DSlowGreaterEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- Broadcast4DSlowLessWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- Broadcast4DSlowLessEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- else // if (requires_broadcast == false)
- {
- switch (op_type)
- {
- case OpType::Equal:
- EqualWithScaling(params, getExtendedTensorShape(lhs),
- reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- NotEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- GreaterWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- GreaterEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- LessWithScaling(params, getExtendedTensorShape(lhs),
- reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- LessEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- return;
+ using CompareFunction =
+ void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
+ bool *output_data);
+
+ static const CompareFunction broadcast_fns[] = {
+ Broadcast4DSlowEqualWithScaling, Broadcast4DSlowNotEqualWithScaling,
+ Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+ Broadcast4DSlowLessWithScaling, Broadcast4DSlowLessEqualWithScaling,
+ };
+ static const CompareFunction non_broadcast_fns[] = {
+ EqualWithScaling, NotEqualWithScaling, GreaterWithScaling,
+ GreaterEqualWithScaling, LessWithScaling, LessEqualWithScaling,
+ };
+
+ static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+ "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+ auto index = static_cast<int>(op_type);
+ if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+ CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+ fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
}
template <typename T>
@@ -149,94 +96,33 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
{
bool requires_broadcast = !HaveSameShapes(lhs, rhs);
- if (requires_broadcast)
- {
- switch (op_type)
- {
- case OpType::Equal:
- Broadcast4DSlowEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- Broadcast4DSlowNotEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- Broadcast4DSlowGreater(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- Broadcast4DSlowGreaterEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- Broadcast4DSlowLess(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- Broadcast4DSlowLessEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- else // if (requires_broadcast == false)
- {
- switch (op_type)
- {
- case OpType::Equal:
- EqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- NotEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- GreaterNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- GreaterEqualNoScaling(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- LessNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- LessEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- return;
+ using CompareFunction =
+ void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+ const T *input2_data, const Shape &output_shape, bool *output_data);
+
+ static const CompareFunction broadcast_fns[] = {
+ Broadcast4DSlowEqual, Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+ Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess, Broadcast4DSlowLessEqual,
+ };
+ static const CompareFunction non_broadcast_fns[] = {
+ EqualNoScaling, NotEqualNoScaling, GreaterNoScaling,
+ GreaterEqualNoScaling, LessNoScaling, LessEqualNoScaling,
+ };
+
+ static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+ "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+ auto index = static_cast<int>(op_type);
+ if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+ CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+ fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
}
+
} // namespace
CompareLayer::CompareLayer()
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index c00be64e5..05da33abf 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -18,6 +18,8 @@
#include "../Tensor.h"
#include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
@@ -31,7 +33,7 @@ namespace ops
FullyConnectedLayer::FullyConnectedLayer()
: _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
_activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
- _is_hybrid(false)
+ _external_context(nullptr), _is_hybrid(false)
{
// DO NOTHING
}
@@ -102,7 +104,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
#else
nnfw::cker::FullyConnectedHybrid(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
@@ -110,31 +113,67 @@ void FullyConnectedLayer::fullyConnectedHybrid()
(_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
: reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
-// TODO Enable calling decrease_ref
-#if 0
if (_cached_weights == nullptr || _is_weights_freed)
return;
- auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
- if (weight_tensor)
+ // '_cached_weights is not nullptr and _is_weights_freed is false' means
+ // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+ // After entering here, it will not enter again except below the case - input is zero-vector
+
+ // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+ // so that handle this case
+ const int input_size = getTensorShape(_input).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+ return;
+
+ auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+ // This weight tensor could be other ops' const tensor.
+ // Therefore, below reference should be checked like following
+ auto tensor = const_cast<Tensor *>(weight_tensor);
+ if (tensor->buffer() == nullptr) // ref is already 0?
{
- auto tensor = const_cast<Tensor *>(weight_tensor);
+ _is_weights_freed = true;
+ return;
+ }
- tensor->decrease_ref();
- if (tensor->buffer() == nullptr) // ref == 0?
- {
- _is_weights_freed = true;
- }
+ tensor->decrease_ref();
+ if (tensor->buffer() == nullptr) // ref == 0?
+ {
+ _is_weights_freed = true;
}
-#endif // if 0
#endif
}
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+
+ int w0_size = getTensorShape(_weights).Dims(0);
+ const uint16_t *w1_segments = _weights->w1_segments();
+ const uint16_t *w1_indices = _weights->w1_indices();
+
+ nnfw::cker::FullyConnectedSparseWeight(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+ w1_indices);
+}
+
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
const IPortableTensor *bias, ir::Activation activation,
- IPortableTensor *output)
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_weights = weights;
@@ -143,6 +182,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
_output = output;
_is_hybrid = input->data_type() == OperandType::FLOAT32 &&
weights->data_type() == OperandType::QUANT_INT8_SYMM;
+ _external_context = external_context;
}
void FullyConnectedLayer::run()
@@ -151,6 +191,10 @@ void FullyConnectedLayer::run()
{
fullyConnectedHybrid();
}
+ else if (_weights->is_sparse())
+ {
+ fullyConnectedSparseWeight();
+ }
else if (_input->data_type() == OperandType::FLOAT32)
{
fullyConnectedFloat32();
@@ -167,7 +211,16 @@ void FullyConnectedLayer::run()
void FullyConnectedLayer::prepare()
{
-#ifdef USE_RUY_GEMV
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
// TODO This is workaround
// The only fc hybrid will use ruy kernel
if (_input->data_type() != OperandType::FLOAT32 ||
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
index dd5ef2436..f1242677c 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
@@ -18,6 +18,7 @@
#define __ONERT_BACKEND_CPU_OPS_FULLYCONNECTEDLAYER_H__
#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
#include "OperationUtils.h"
#include <exec/IFunction.h>
@@ -52,8 +53,11 @@ public:
void fullyConnectedHybrid();
+ void fullyConnectedSparseWeight();
+
void configure(const IPortableTensor *input, const IPortableTensor *weights,
- const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+ const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context);
void run() override;
@@ -68,10 +72,13 @@ private:
ir::Activation _activation;
std::unique_ptr<nnfw::cker::FCTempArena> _temp_arena;
+ std::shared_ptr<ExternalContext> _external_context;
+
bool _is_hybrid;
#ifdef USE_RUY_GEMV
uint8_t *_cached_weights = nullptr; // weights to be cached and a key
+ bool _is_weights_freed = false; // is weights freed?
#endif
};
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.cc b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
new file mode 100644
index 000000000..0d99b0586
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "L2NormLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/L2Normalize.h>
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+void L2NormLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+}
+
+void L2NormLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::L2NormalizeFloat32(
+ getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ break;
+
+ case OperandType::QUANT_UINT8_ASYMM:
+ {
+ nnfw::cker::L2NormParams params;
+ assert(_input->data_offset() == 128);
+ params.input_zero_point = _input->data_offset();
+ nnfw::cker::L2NormalizeQuant8(
+ params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ }
+ break;
+
+ default:
+ throw std::runtime_error{"L2Norm: Unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.h b/runtime/onert/backend/cpu/ops/L2NormLayer.h
new file mode 100644
index 000000000..63f2d1133
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class L2NormLayer : public ::onert::exec::IFunction
+{
+public:
+ L2NormLayer() : _input(nullptr), _output(nullptr)
+ {
+ // Nothing
+ }
+
+public:
+ void configure(const IPortableTensor *_input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index d71e325ac..06dde4fc4 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -49,8 +49,8 @@ void LogSoftMaxLayer::logsoftmaxQuant8()
// NYI
}
-void LogSoftMaxLayer::configure(const Tensor *input, const float beta, const int axis,
- Tensor *output)
+void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
+ IPortableTensor *output)
{
_input = input;
_output = output;
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index bc145cea7..ba9deca17 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -40,13 +40,14 @@ public:
void logsoftmaxQuant8();
- void configure(const Tensor *input, const float beta, const int axis, Tensor *output);
+ void configure(const IPortableTensor *input, const float beta, const int axis,
+ IPortableTensor *output);
void run();
private:
- const Tensor *_input;
- Tensor *_output;
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
float _beta;
int _axis;
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index 8d29374ff..98385521a 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -52,6 +52,17 @@ union DataPtr {
void *v;
};
+union ConstDataPtr {
+ const uint8_t *u8;
+ const int8_t *i8;
+ const uint32_t *u32;
+ const int32_t *i32;
+ const bool *b;
+ const float *f;
+ const int64_t *i64;
+ const void *v;
+};
+
uint32_t getNumberOfDimensions(const IPortableTensor *tensor);
uint32_t getNumberOfElements(const IPortableTensor *tensor);
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.cc b/runtime/onert/backend/cpu/ops/PadLayer.cc
index fcfcf7b5e..6a2bf9da0 100644
--- a/runtime/onert/backend/cpu/ops/PadLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PadLayer.cc
@@ -33,33 +33,40 @@ PadLayer::PadLayer()
// DO NOTHING
}
-void PadLayer::padFloat32()
+template <typename T> void PadLayer::padImpl(const T *constant_value_data)
{
- nnfw::cker::Pad(_padData, _padRank, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()), _constantValueData.f);
+ nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
+ reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+ reinterpret_cast<T *>(_output->buffer()), constant_value_data);
}
-void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
- const int32_t *padData, int32_t padRank, uint8_t *constantValueData)
+ const int32_t *padData, int32_t padRank, const void *constantValueData)
{
_input = input;
_output = output;
memcpy(_padData, padData, sizeof(_padData));
_padRank = padRank;
- _constantValueData.u8 = constantValueData;
+ _constantValueData.v = constantValueData;
}
void PadLayer::run()
{
if (_input->data_type() == OperandType::FLOAT32)
{
- padFloat32();
+ padImpl<float>(_constantValueData.f);
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- padQuant8();
+ if (_constantValueData.u8 == nullptr)
+ {
+ uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
+ padImpl<uint8_t>(&pad_value);
+ }
+ else
+ {
+ padImpl<uint8_t>(_constantValueData.u8);
+ }
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.h b/runtime/onert/backend/cpu/ops/PadLayer.h
index 85bd2e6f0..efd73d5e5 100644
--- a/runtime/onert/backend/cpu/ops/PadLayer.h
+++ b/runtime/onert/backend/cpu/ops/PadLayer.h
@@ -39,12 +39,10 @@ public:
PadLayer();
public:
- void padFloat32();
-
- void padQuant8();
+ template <typename T> void padImpl(const T *constant_value_data);
void configure(const IPortableTensor *input, IPortableTensor *output, const int32_t *padData,
- int32_t padRank, uint8_t *constantValueData = nullptr);
+ int32_t padRank, const void *constantValueData = nullptr);
void run() override;
@@ -54,7 +52,7 @@ private:
int32_t _padData[8];
int32_t _padRank;
- DataPtr _constantValueData;
+ ConstDataPtr _constantValueData;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
new file mode 100644
index 000000000..45fc148bf
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Quantize.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
+{
+ nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
+ _output->data_scale(), _output->data_offset());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void QuantizeLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ affineQuantize<float, uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
new file mode 100644
index 000000000..b4e7aca40
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+ QuantizeLayer();
+
+public:
+ template <typename InputT, typename OutputT> void affineQuantize();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
new file mode 100644
index 000000000..26eb35e0d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6Layer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/ReLU6.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void ReLU6Layer::relu6Float32()
+{
+ nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ReLU6Layer::relu6Quant8()
+{
+ // cker quant8 relu is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void ReLU6Layer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ relu6Float32();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ relu6Quant8();
+ }
+ else
+ {
+ throw std::runtime_error{"ReLU6: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ReLU6Layer.h
new file mode 100644
index 000000000..994d17a30
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ReLU6Layer : public ::onert::exec::IFunction
+{
+public:
+ ReLU6Layer();
+
+public:
+ void relu6Float32();
+
+ void relu6Quant8();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index 1dad031aa..fe22dbed7 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -116,6 +116,39 @@ void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+
+void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel)
+{
+ const bool same_scale = (input->data_scale() == output->data_scale() &&
+ input->data_offset() == output->data_offset());
+
+ reduce_kernel.prepare(input->num_dimensions(), axes.size());
+
+ if (!same_scale)
+ {
+ std::vector<int32_t> temp_sum(output->getShape().num_elements());
+ bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
+ reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
+ input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
+ output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
+ temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
+ const int32_t actual_in = static_cast<int32_t>(in);
+ return current + actual_in;
+ });
+
+ if (!result)
+ {
+ throw std::runtime_error{"Reduce: Fail to run"};
+ }
+
+ return;
+ }
+
+ evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+}
+
} // namespace
ReduceLayer::ReduceLayer()
@@ -143,6 +176,11 @@ void ReduceLayer::run()
switch (_reduceType)
{
case ReduceType::kSum:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ return;
+ }
evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
break;
case ReduceType::kProd:
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
new file mode 100644
index 000000000..180094bb8
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OperationUtils.h"
+#include "ResizeBilinearLayer.h"
+#include "cker/operation/ResizeBilinear.h"
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ResizeBilinearLayer::ResizeBilinearLayer()
+ : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
+ _half_pixel_centers(false)
+{
+ // DO NOTHING
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ int32_t output_height, int32_t output_width, bool align_corners,
+ bool half_pixel_centers)
+{
+ _input = input;
+ _output = output;
+ _output_height = output_height;
+ _output_width = output_width;
+ _align_corners = align_corners;
+ _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::run()
+{
+ nnfw::cker::ResizeBilinearParams params;
+ params.align_corners = _align_corners;
+ params.half_pixel_centers = _half_pixel_centers;
+ params.output_height = _output_height;
+ params.output_width = _output_width;
+
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::ResizeBilinear(
+ params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ break;
+
+ case OperandType::QUANT_UINT8_ASYMM:
+ nnfw::cker::ResizeBilinear(
+ params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ break;
+
+ case OperandType::UINT8:
+ case OperandType::BOOL8:
+ case OperandType::FLOAT16:
+ case OperandType::INT32:
+ case OperandType::INT64:
+ case OperandType::QUANT_INT8_SYMM:
+ std::runtime_error("ResizeBilinear NYI");
+ break;
+ default:
+ std::runtime_error("ResizeBilinear unsupported data type");
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
new file mode 100644
index 000000000..fc49b348e
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+#define __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ResizeBilinearLayer : public ::onert::exec::IFunction
+{
+public:
+ ResizeBilinearLayer();
+
+public:
+ void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+ int32_t output_width, bool align_corners, bool half_pixel_centers);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+ int32_t _output_height;
+ int32_t _output_width;
+ bool _align_corners;
+ bool _half_pixel_centers;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.cc b/runtime/onert/backend/cpu/ops/SliceLayer.cc
index a9106c1a2..449c073e6 100644
--- a/runtime/onert/backend/cpu/ops/SliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.cc
@@ -46,7 +46,7 @@ void SliceLayer::GetBeginAndSizeVectors(int dimensions, const IPortableTensor *b
}
}
-void SliceLayer::sliceFloat32()
+template <typename T> void SliceLayer::sliceImpl()
{
const int kMaxDim = nnfw::cker::Shape::kMaxSmallSize;
@@ -74,14 +74,8 @@ void SliceLayer::sliceFloat32()
}
nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SliceLayer::sliceQuant8()
-{
- // cker quant8 slice is not implemented yet
- throw std::runtime_error{"NYI"};
+ reinterpret_cast<const T *>(_input->buffer()),
+ reinterpret_cast<T *>(_output->buffer()));
}
void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
@@ -97,11 +91,11 @@ void SliceLayer::run()
{
if (_input->data_type() == OperandType::FLOAT32)
{
- sliceFloat32();
+ sliceImpl<float>();
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- sliceQuant8();
+ sliceImpl<uint8_t>();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.h b/runtime/onert/backend/cpu/ops/SliceLayer.h
index 9945d7ee6..650e2c97a 100644
--- a/runtime/onert/backend/cpu/ops/SliceLayer.h
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.h
@@ -42,8 +42,7 @@ public:
void run() override;
private:
- void sliceFloat32();
- void sliceQuant8();
+ template <typename T> void sliceImpl();
template <typename T>
void GetBeginAndSizeVectors(int dimensions, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
new file mode 100644
index 000000000..a0869aed8
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepthLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SpaceToDepth.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+SpaceToDepthLayer::SpaceToDepthLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void SpaceToDepthLayer::spaceToDepth()
+{
+
+ nnfw::cker::SpaceToDepthParams params;
+ params.block_size = _block_size;
+
+ nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
+ reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+ reinterpret_cast<T *>(_output->buffer()));
+}
+
+void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void SpaceToDepthLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ spaceToDepth<float>();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ spaceToDepth<uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"SpaceToDepth: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h
new file mode 100644
index 000000000..c11ef2b0a
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class SpaceToDepthLayer : public ::onert::exec::IFunction
+{
+public:
+ SpaceToDepthLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void spaceToDepth();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPACE_TO_BATCH_ND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.cc b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
new file mode 100644
index 000000000..d6ca12442
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitVLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SplitV.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+SplitVLayer::SplitVLayer()
+ : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+{
+ // DO NOTHING
+}
+
+template <typename T> void SplitVLayer::splitV(void)
+{
+ nnfw::cker::SplitVParams op_params;
+ op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+ op_params.num_split = _num_splits;
+
+ std::vector<T *> outputPtrs;
+ std::vector<nnfw::cker::Shape> outshape;
+
+ for (const auto output : _outputs)
+ {
+ assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
+ outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+ outshape.emplace_back(getTensorShape(output));
+ }
+
+ assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
+ nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
+ outshape, outputPtrs.data());
+}
+
+void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+ const IPortableTensor *split_dim, uint16_t num_splits,
+ std::vector<IPortableTensor *> &outputs)
+{
+ assert(input != nullptr);
+
+ _num_splits = num_splits;
+ _size_splits = size_splits;
+ _input = input;
+ _split_dim = split_dim;
+ _outputs = outputs;
+}
+
+void SplitVLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ splitV<float>();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ splitV<uint8_t>();
+ }
+ else if (_input->data_type() == OperandType::INT32)
+ {
+ splitV<int32_t>();
+ }
+ else if (_input->data_type() == OperandType::INT64)
+ {
+ splitV<int64_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"SplitV: unsupported input type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.h b/runtime/onert/backend/cpu/ops/SplitVLayer.h
new file mode 100644
index 000000000..98f2f4406
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class SplitVLayer : public ::onert::exec::IFunction
+{
+public:
+ SplitVLayer();
+
+public:
+ template <typename T> void splitV(void);
+
+ void configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+ const IPortableTensor *size_dim, uint16_t num_splits,
+ std::vector<IPortableTensor *> &outputs);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_size_splits;
+ const IPortableTensor *_split_dim;
+ uint16_t _num_splits;
+ std::vector<IPortableTensor *> _outputs;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
new file mode 100644
index 000000000..b8dfcb4b5
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StatelessRandomUniformLayer.h"
+
+#include <cker/operation/StatelessRandomUniform.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+StatelessRandomUniformLayer::StatelessRandomUniformLayer()
+ : _shape(nullptr), _seed(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
+ const IPortableTensor *seed, IPortableTensor *output)
+{
+ _shape = shape;
+ _seed = seed;
+ _output = output;
+}
+
+void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
+{
+ nnfw::cker::StatelessRandomUniform(
+ getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
+ getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void StatelessRandomUniformLayer::run()
+{
+ switch (_output->data_type())
+ {
+ // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
+ case OperandType::FLOAT32:
+ StatelessRandomUniformFloat32();
+ break;
+ default:
+ throw std::runtime_error{"StatelessRandomUniformLayer: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h
new file mode 100644
index 000000000..ef11d623d
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class StatelessRandomUniformLayer : public ::onert::exec::IFunction
+{
+public:
+ StatelessRandomUniformLayer();
+
+public:
+ void configure(const IPortableTensor *shape, const IPortableTensor *seed,
+ IPortableTensor *output);
+
+ void StatelessRandomUniformFloat32();
+
+ void run() override;
+
+private:
+ const IPortableTensor *_shape;
+ const IPortableTensor *_seed;
+
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__