diff options
Diffstat (limited to 'runtime/onert/backend/acl_neon/KernelGenerator.cc')
-rw-r--r-- | runtime/onert/backend/acl_neon/KernelGenerator.cc | 856 |
1 files changed, 294 insertions, 562 deletions
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index e47186754..1195b83cc 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -31,6 +31,7 @@ #include "exec/NopFunction.h" #include "util/logging.h" #include "util/Utils.h" +#include "AclKernelGen.h" namespace onert { @@ -74,15 +75,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -96,10 +97,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); auto frontend_layout = _current_op_seq_layout; - auto backend_layout = ifm_alloc->layout(); + auto backend_layout = ifm_tensor->layout(); int axis_value = node.param().axis; if (axis_value < 0) @@ -112,7 +113,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>(); - fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(), + fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), arm_compute::ReductionOperation::ARG_IDX_MAX); auto acl_fn = asAclFunction(std::move(fn)); @@ -127,15 +128,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto block_size_alloc = _tensor_builder->at(block_size_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto block_size_tensor = _tensor_builder->at(block_size_index).get(); assert(_ctx.at(block_size_index).data()); auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>(); - fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle()); + fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -147,15 +148,26 @@ void KernelGenerator::visit(const ir::operation::Cast &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::NECast>(); + std::unique_ptr<::arm_compute::IFunction> fn; + if (ifm_tensor->data_type() == ofm_tensor->data_type()) + { + auto l = std::make_unique<::arm_compute::NECopy>(); - auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8 - ? arm_compute::SubDataType::BOOL - : arm_compute::SubDataType::NONE; - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type); + l->configure(ifm_tensor->handle(), ofm_tensor->handle()); + + fn = std::move(l); + } + else + { + auto l = std::make_unique<::arm_compute::NECast>(); + + l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); + + fn = std::move(l); + } auto acl_fn = asAclFunction(std::move(fn)); @@ -183,10 +195,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) ker_width, ker_height); const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto ker_alloc = _tensor_builder->at(ker_index).get(); - auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto bias_tensor = _tensor_builder->at(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); @@ -194,8 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>( _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(), - conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); + fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), + ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(), + ::arm_compute::Size2D(1U, 1U), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -208,12 +221,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node) auto block_size = node.param().block_size; assert(block_size > 0); - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); - auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>(); + auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle(), block_size); + fn->configure(input_tensor->handle(), output_tensor->handle(), block_size); auto acl_fn = asAclFunction(std::move(fn)); @@ -242,10 +255,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto ker_alloc = _tensor_builder->at(ker_index).get(); - auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto bias_tensor = _tensor_builder->at(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); @@ -253,8 +266,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) { auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); - fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), - ofm_alloc->handle(), conv_info, multiplier, act_info); + fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), + ofm_tensor->handle(), conv_info, multiplier, act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -265,12 +278,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -279,88 +292,28 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node) void KernelGenerator::visit(const ir::operation::MaxPool2D &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; - - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( + node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX); - const auto kh = node.param().kh; - const auto kw = node.param().kw; - const auto stride = node.param().stride; - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); const auto activation = node.param().activation; - - VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl; - VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl; - VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl; - VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl; - VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl; - VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; - VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl; - VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl; - VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl; - VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl; - - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX, - ::arm_compute::Size2D{kw, kh}, - acl_common::asPadStrideInfo(padding, stride)}; - - auto fn = std::make_unique<::arm_compute::NEPoolingLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::AvgPool2D &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; - - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( + node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG); - const auto kh = node.param().kh; - const auto kw = node.param().kw; - const auto stride = node.param().stride; - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); const auto activation = node.param().activation; - - VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl; - VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl; - VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl; - VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl; - VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl; - VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; - VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl; - VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl; - VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl; - VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl; - - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{ - ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh}, - acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; - - auto fn = std::make_unique<::arm_compute::NEPoolingLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Concat &node) @@ -383,7 +336,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) return; } - auto output_alloc = _tensor_builder->at(ofm_index).get(); + auto output_tensor = _tensor_builder->at(ofm_index).get(); std::vector<::arm_compute::ITensor *> input_tensors; for (const auto &ifm_ind : input_indexes) input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); @@ -392,7 +345,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) if (input_indexes.size() < 2) { auto l = std::make_unique<::arm_compute::NECopy>(); - l->configure(input_tensors.at(0), output_alloc->handle()); + l->configure(input_tensors.at(0), output_tensor->handle()); fn = std::move(l); } else @@ -400,10 +353,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node) auto l = std::make_unique<::arm_compute::NEConcatenateLayer>(); const auto rank = _ctx.at(ofm_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = output_alloc->layout(); + const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); - l->configure(input_tensors, output_alloc->handle(), fixed_axis); + l->configure(input_tensors, output_tensor->handle(), fixed_axis); fn = std::move(l); } @@ -418,13 +371,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto lookups_alloc = _tensor_builder->at(lookups_index).get(); - auto values_alloc = _tensor_builder->at(values_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto lookups_tensor = _tensor_builder->at(lookups_index).get(); + auto values_tensor = _tensor_builder->at(values_index).get(); auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>(); - fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); + fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -436,12 +389,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); auto fn = std::make_unique<::arm_compute::NEFloor>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -450,76 +403,15 @@ void KernelGenerator::visit(const ir::operation::Floor &node) void KernelGenerator::visit(const ir::operation::FullyConnected &node) { - using ir::operation::FullyConnected; - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; - const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; - const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; - - const auto input_rank = _ctx.at(input_index).shape().rank(); - - const auto output_size = - _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1); - UNUSED_RELEASE(output_size); - assert(_ctx.at(bias_index).shape().dim(0) == output_size); - assert(_ctx.at(weight_index).shape().dim(0) == output_size); - const auto batch_size = - _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2); - const auto input_size = - _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1); - - // Check for reshaping input's shape into rank-2 - bool needs_reshape = false; - ir::Shape reshape(2); - if (input_rank == 3 || input_rank == 4) - { - const auto &ifm_shape = _ctx.at(input_index).shape(); - auto feature_size = 1; - for (int i = 0; i < ifm_shape.rank(); ++i) - { - feature_size *= ifm_shape.dim(i); - } - - UNUSED_RELEASE(feature_size); - assert(feature_size == batch_size * input_size); - - // for reshaping - needs_reshape = true; - reshape.dim(0) = batch_size; /* H */ - reshape.dim(1) = input_size; /* W */ - } - + auto output_tensor = _tensor_builder->at(output_index).get(); const auto activation = node.param().activation; - auto output_alloc = _tensor_builder->at(output_index).get(); - const auto input_alloc = _tensor_builder->at(input_index).get(); - const auto weight_alloc = _tensor_builder->at(weight_index).get(); - const auto bias_alloc = _tensor_builder->at(bias_index).get(); - const auto frontend_layout = _current_op_seq_layout; - const auto acl_layout = output_alloc->handle()->info()->data_layout(); - - auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type = - arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL; - if (_ctx.at(weight_index).isConstant()) - { - kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS; - assert(_ctx.at(weight_index).data()); - } - - fn->configure( - input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(), - needs_reshape, - ::onert::backend::acl_common::asTensorShape( - reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)), - kernel_type); - + auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor, + ::arm_compute::NEFullyConnectedReshapingLayer>( + node, _ctx, _tensor_builder, _current_op_seq_layout); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), - ActivationBuilder::generate(activation, output_alloc->handle())); + std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } void KernelGenerator::visit(const ir::operation::HashtableLookup &node) @@ -531,17 +423,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node) const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto hits_alloc = _tensor_builder->at(hits_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto hits_tensor = _tensor_builder->at(hits_index).get(); - auto lookups_alloc = _tensor_builder->at(lookups_index).get(); - auto keys_alloc = _tensor_builder->at(keys_index).get(); - auto values_alloc = _tensor_builder->at(values_index).get(); + auto lookups_tensor = _tensor_builder->at(lookups_index).get(); + auto keys_tensor = _tensor_builder->at(keys_index).get(); + auto values_tensor = _tensor_builder->at(values_index).get(); auto fn = std::make_unique<::arm_compute::NEHashtableLookup>(); - fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(), - output_alloc->handle(), hits_alloc->handle()); + fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), + output_tensor->handle(), hits_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -561,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // Converting in reverse order const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto indices_alloc = _tensor_builder->at(indices_index).get(); - const auto backend_layout = ofm_alloc->layout(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto indices_tensor = _tensor_builder->at(indices_index).get(); + const auto backend_layout = ofm_tensor->layout(); UNUSED_RELEASE(backend_layout); // NOTE The frontend layout and backend layout must be the same for this operation. @@ -575,35 +467,35 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // a model. For example, if a model in NHWC has this operation as output rank == 4, indices // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. - assert(backend_layout == ifm_alloc->layout()); - assert(backend_layout == indices_alloc->layout()); + assert(backend_layout == ifm_tensor->layout()); + assert(backend_layout == indices_tensor->layout()); assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); auto fn = std::make_unique<::arm_compute::NEGatherEx>(); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; - assert(n == ifm_alloc->num_dimensions()); + assert(n == ifm_tensor->num_dimensions()); size_t k = _ctx.at(indices_index).shape().rank(); - assert(k == indices_alloc->num_dimensions()); + assert(k == indices_tensor->num_dimensions()); // Disable applied dim_correction - if (n != ifm_alloc->info()->num_dimensions()) + if (n != ifm_tensor->info()->num_dimensions()) { // This means that high dimension's value is 1 and ifm tensor is applied dim_correction const auto ifm = _ctx.at(ifm_index); - ifm_alloc->info()->set_tensor_shape( + ifm_tensor->info()->set_tensor_shape( acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false)); } - if (k != indices_alloc->info()->num_dimensions()) + if (k != indices_tensor->info()->num_dimensions()) { // This means that high dimension's value is 1 and indices tensor is applied dim_correction const auto indices = _ctx.at(indices_index); - indices_alloc->info()->set_tensor_shape( + indices_tensor->info()->set_tensor_shape( acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false)); } - fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis); + fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would // use arm_compute::TensorInfo::offset_element_in_bytes() @@ -621,20 +513,20 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node) const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto gamma_alloc = _tensor_builder->at(gamma_index).get(); - auto beta_alloc = _tensor_builder->at(beta_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto gamma_tensor = _tensor_builder->at(gamma_index).get(); + auto beta_tensor = _tensor_builder->at(beta_index).get(); auto epsilon = node.param().epsilon; auto activation = node.param().activation; auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(), - beta_alloc->handle(), epsilon); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), + beta_tensor->handle(), epsilon); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::L2Normalization &node) @@ -656,15 +548,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) float bias = 0.0f; // Don't offset the reduction. - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, radius, alpha, beta, bias, false); auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -673,32 +565,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) void KernelGenerator::visit(const ir::operation::L2Pool2D &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)}; - - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( + node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2); - uint32_t kw = node.param().kw; - uint32_t kh = node.param().kh; - const auto stride = node.param().stride; - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); const auto activation = node.param().activation; - - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{ - ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh}, - ::onert::backend::acl_common::asPadStrideInfo(padding, stride)}; - - auto fn = std::make_unique<::arm_compute::NEPoolingLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) @@ -712,15 +587,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod auto beta = node.param().beta; auto bias = node.param().bias; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo( ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -733,13 +608,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node) const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input0_alloc = _tensor_builder->at(input0_index).get(); - auto input1_alloc = _tensor_builder->at(input1_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input0_tensor = _tensor_builder->at(input0_index).get(); + auto input1_tensor = _tensor_builder->at(input1_index).get(); auto fn = std::make_unique<::arm_compute::NELogicalAnd>(); - fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -751,12 +626,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<::arm_compute::NEBitwiseNot>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -769,13 +644,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node) const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input0_alloc = _tensor_builder->at(input0_index).get(); - auto input1_alloc = _tensor_builder->at(input1_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input0_tensor = _tensor_builder->at(input0_index).get(); + auto input1_tensor = _tensor_builder->at(input1_index).get(); auto fn = std::make_unique<::arm_compute::NELogicalOr>(); - fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -787,8 +662,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; @@ -798,7 +673,7 @@ void KernelGenerator::visit(const ir::operation::Logistic &node) // instead of 'INF', and then the result of this op will be errors due to the 'NaN'. auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -807,159 +682,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node) void KernelGenerator::visit(const ir::operation::LSTM &node) { - // TODO Support dynamic rnn - // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. - const auto scratch_buffer_index{ - node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; - const auto output_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; - const auto cell_state_out_index{ - node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; - const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)}; - - const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)}; - const auto input_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional - const auto input_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)}; - const auto input_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)}; - const auto input_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)}; - const auto recurrent_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional - const auto recurrent_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)}; - const auto recurrent_to_cell_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)}; - const auto recurrent_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; - const auto cell_to_input_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional - const auto cell_to_forget_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional - const auto cell_to_output_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional - const auto input_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; - const auto forget_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)}; - const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)}; - const auto output_gate_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)}; - const auto projection_weights_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional - const auto projection_bias_index{ - node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional - const auto output_state_in_index{ - node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)}; - const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)}; - const auto cell_threshold = node.param().cell_threshold; - const auto projection_threshold = node.param().projection_threshold; - - bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; - bool has_recurrent_to_input_weights = - _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; - bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; - bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; - bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && - _ctx.at(projection_weights_index).shape().dim(1) != 0; - bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); - - // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG. - // true: no CIFG - // false: CIFG - // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG). - bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; - - // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole. - // But the cell_to_input_weights does not exist in regular CIFG although peephole. - // true: peephole - // false: no peephole - bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; - - // NOTE Although the projection weights has data the projection bias may not have data. - bool has_projection_param = has_projection_weights; - - const auto activation = node.param().activation; - const auto cell_clip = cell_threshold; - const auto projection_clip = projection_threshold; - assert(cell_clip >= 0.f && projection_clip >= 0.f); - - auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get(); - auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get(); - auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get(); - auto output_alloc = _tensor_builder->at(output_index).get(); - - auto input_alloc = _tensor_builder->at(input_index).get(); - - auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get(); - auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get(); - auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get(); - auto recurrent_to_forget_weights_alloc = - _tensor_builder->at(recurrent_to_forget_weights_index).get(); - auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get(); - auto recurrent_to_output_weights_alloc = - _tensor_builder->at(recurrent_to_output_weights_index).get(); - - auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get(); - auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get(); - auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get(); - auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get(); - auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get(); - - auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - - auto fn = std::make_unique<::arm_compute::NELSTMLayer>(); - - ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{}; - if (has_cifg_param) - { - auto input_to_input_weights_alloc = - _tensor_builder->at(input_to_input_weights_index).get(); // optional - auto recurrent_to_input_weights_alloc = - _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional - auto cell_to_input_weights_handle = - has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle() - : nullptr; // optional (non-cifg && peephole) - auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional - lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(), - recurrent_to_input_weights_alloc->handle(), - cell_to_input_weights_handle, input_gate_bias_alloc->handle()); - } - if (has_peephole_param) - { - auto cell_to_forget_weights_alloc = - _tensor_builder->at(cell_to_forget_weights_index).get(); // optional - auto cell_to_output_weights_alloc = - _tensor_builder->at(cell_to_output_weights_index).get(); // optional - lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(), - cell_to_output_weights_alloc->handle()); - } - if (has_projection_param) - { - auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional - auto projection_bias_handle = has_projection_bias - ? _tensor_builder->at(projection_bias_index).get()->handle() - : nullptr; // optional - lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle); - } - - fn->configure( - input_alloc->handle(), input_to_forget_weights_alloc->handle(), - input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(), - recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(), - recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(), - cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(), - cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(), - output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(), - lstm_params, act_info, cell_clip, projection_clip); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor, + ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder); } void KernelGenerator::visit(const ir::operation::Mul &node) @@ -970,18 +694,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node) const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>(); // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Neg &node) @@ -989,12 +713,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); auto fn = std::make_unique<::arm_compute::NENegLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1030,12 +754,12 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) { size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_alloc = _tensor_builder->at(input_index); - assert(input_rank == input_alloc->num_dimensions()); - if (input_rank != input_alloc->info()->num_dimensions()) + const auto &input_tensor = _tensor_builder->at(input_index); + assert(input_rank == input_tensor->num_dimensions()); + if (input_rank != input_tensor->info()->num_dimensions()) { // This means that high dimension's value is 1 and ifm tensor is applied dim_correction - input_alloc->info()->set_tensor_shape(acl_common::asTensorShape( + input_tensor->info()->set_tensor_shape(acl_common::asTensorShape( _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false)); } } @@ -1094,8 +818,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node) const auto ofm_idx{node.getOutputs().at(0)}; const auto ifm_idx{node.getInputs().at(0)}; const auto permute_type = node.getPermuteType(); - auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); - auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); + auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); const auto rank = _ctx.at(ofm_idx).shape().rank(); assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); @@ -1108,7 +832,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node) auto l = std::make_unique<::arm_compute::NEPermute>(); - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); fn = std::move(l); } @@ -1119,7 +843,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node) auto l = std::make_unique<::arm_compute::NEPermute>(); - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); + l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); fn = std::move(l); } @@ -1127,7 +851,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node) { auto l = std::make_unique<::arm_compute::NECopy>(); - l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + l->configure(ifm_tensor->handle(), ofm_tensor->handle()); fn = std::move(l); } @@ -1143,15 +867,15 @@ void KernelGenerator::visit(const ir::operation::PReLU &node) const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto alpha_alloc = _tensor_builder->at(alpha_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto alpha_tensor = _tensor_builder->at(alpha_index).get(); std::unique_ptr<::arm_compute::IFunction> fn; - auto l = std::make_unique<::arm_compute::NEPReLU>(); + auto l = std::make_unique<::arm_compute::NEPReluLayer>(); - l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle()); + l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); fn = std::move(l); @@ -1166,14 +890,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = input_alloc->layout(); + const auto backend_layout = input_tensor->layout(); const auto reduce_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); const auto reduce_type = node.param().reduce_type; @@ -1182,11 +906,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) std::unique_ptr<::arm_compute::IFunction> fn; if (reduce_type == ir::operation::Reduce::ReduceType::MEAN) { - // NOTE NEReduceMean has a bug that does not support NHWC layout - // NEReduceMean intermediate tensors are always NCHW layout - auto l = std::make_unique<::arm_compute::NEReduceMeanEx>(); + auto l = std::make_unique<::arm_compute::NEReduceMean>(); - l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle()); + l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); fn = std::move(l); } @@ -1194,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) { auto l = std::make_unique<::arm_compute::NEReduceSum>(); - l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle()); + l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); fn = std::move(l); } @@ -1202,7 +924,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) { auto l = std::make_unique<::arm_compute::NEReduceOperation>(); - l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle(), + l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(), acl_common::convertReduceType(reduce_type)); fn = std::move(l); @@ -1218,15 +940,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<arm_compute::NEActivationLayer>(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -1238,15 +960,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -1258,15 +980,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -1278,13 +1000,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = output_alloc->layout(); + const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); UNUSED_RELEASE(frontend_layout); @@ -1292,7 +1014,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1305,12 +1027,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); auto fn = std::make_unique<::arm_compute::NEScale>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); @@ -1334,25 +1056,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node) const auto activation = node.param().activation; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); - auto weights_alloc = _tensor_builder->at(weights_index).get(); - auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get(); - auto bias_alloc = _tensor_builder->at(bias_index).get(); - auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); + auto weights_tensor = _tensor_builder->at(weights_index).get(); + auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get(); + auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get(); auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); auto copy_layer = std::make_unique<::arm_compute::NECopy>(); - copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle()); + copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); _return_fn = asAclFunction(std::move(copy_layer)); - auto fn = std::make_unique<::arm_compute::NERNNLayerEx>( + auto fn = std::make_unique<::arm_compute::NERNNLayer>( _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(), - bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(), - act_info); + fn->configure(input_tensor->handle(), weights_tensor->handle(), + recurrent_weights_tensor->handle(), bias_tensor->handle(), + hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -1361,12 +1083,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); auto fn = std::make_unique<::arm_compute::NERsqrtLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); _return_fn = asAclFunction(std::move(fn)); } @@ -1383,10 +1105,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) (void)dims; (void)ndim; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); _return_fn = std::move(acl_fn); } @@ -1396,15 +1118,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<arm_compute::NEActivationLayer>(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -1417,13 +1139,25 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; const auto beta = node.param().beta; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); + const auto frontend_layout = _current_op_seq_layout; + const auto backend_layout = input_tensor->layout(); + + // Disable applied dim_correction + const size_t input_rank = _ctx.at(input_index).shape().rank(); + if (input_rank != input_tensor->info()->num_dimensions()) + { + // This means that high dimension's value is 1 and input tensor is applied dim_correction + const auto input = _ctx.at(input_index); + input_tensor->info()->set_tensor_shape( + acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false)); + } auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>( _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_alloc->handle(), output_alloc->handle(), beta); + fn->configure(input_tensor->handle(), output_tensor->handle(), beta); auto acl_fn = asAclFunction(std::move(fn)); @@ -1438,20 +1172,18 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto block_size_alloc = _tensor_builder->at(block_size_index).get(); - auto paddings_alloc = _tensor_builder->at(paddings_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto block_size_tensor = _tensor_builder->at(block_size_index).get(); + auto paddings_tensor = _tensor_builder->at(paddings_index).get(); assert(_ctx.at(block_size_index).data()); assert(_ctx.at(paddings_index).data()); - // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is - // not 0. - auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>(); + auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>(); - fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(), - ofm_alloc->handle()); + fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), + ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1465,12 +1197,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) auto block_size = node.param().block_size; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>(); + auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>(); - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size); + fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size); auto acl_fn = asAclFunction(std::move(fn)); @@ -1489,13 +1221,13 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &output : node.getOutputs()) output_indexes.emplace_back(output); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - std::vector<arm_compute::ITensor *> output_allocs; + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + std::vector<arm_compute::ITensor *> output_tensors; for (const auto &ofm_ind : output_indexes) - output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = ifm_alloc->layout(); + const auto backend_layout = ifm_tensor->layout(); auto axis = node.param().axis; if (axis < 0) axis += ifm_rank; @@ -1503,7 +1235,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) auto fn = std::make_unique<::arm_compute::NESplit>(); - fn->configure(ifm_alloc->handle(), output_allocs, axis); + fn->configure(ifm_tensor->handle(), output_tensors, axis); _return_fn = asAclFunction(std::move(fn)); } @@ -1513,15 +1245,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); const ::arm_compute::ActivationLayerInfo act_info{ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); auto acl_fn = asAclFunction(std::move(fn)); @@ -1534,13 +1266,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node) const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1555,17 +1287,17 @@ void KernelGenerator::visit(const ir::operation::Sub &node) const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Slice &node) @@ -1575,10 +1307,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto outputData_alloc = _tensor_builder->at(output_index).get(); - auto inputData_alloc = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_builder->at(output_index).get(); + auto inputData_tensor = _tensor_builder->at(input_index).get(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = inputData_alloc->layout(); + const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData int input_rank = _ctx.at(input_index).shape().rank(); @@ -1628,7 +1360,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto fn = std::make_unique<::arm_compute::NESlice>(); - fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set); + fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); auto acl_fn = asAclFunction(std::move(fn)); @@ -1643,10 +1375,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto outputData_alloc = _tensor_builder->at(output_index).get(); - auto inputData_alloc = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_builder->at(output_index).get(); + auto inputData_tensor = _tensor_builder->at(input_index).get(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = inputData_alloc->layout(); + const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData int input_rank = _ctx.at(input_index).shape().rank(); @@ -1715,7 +1447,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto fn = std::make_unique<::arm_compute::NEStridedSlice>(); - fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set, + fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set, begin_mask, end_mask, shrink_axis_mask); auto acl_fn = asAclFunction(std::move(fn)); @@ -1749,16 +1481,16 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); } - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto ifm_alloc = _tensor_builder->at(ifm_index).get(); - auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ker_tensor = _tensor_builder->at(ker_index).get(); const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>(); - fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, - invalid_horizontal, invalid_vertical); + fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), + tconv_info, invalid_horizontal, invalid_vertical); auto acl_fn = asAclFunction(std::move(fn)); @@ -1771,10 +1503,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; const auto &perm{node.param().perm}; - auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); - const auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); + const auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = ifm_alloc->layout(); + const auto backend_layout = ifm_tensor->layout(); const auto rank = _ctx.at(ifm_idx).shape().rank(); std::vector<std::int32_t> pv(perm.cbegin(), perm.cend()); @@ -1783,11 +1515,11 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2) + if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2) { auto l = std::make_unique<::arm_compute::NETranspose>(); - l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + l->configure(ifm_tensor->handle(), ofm_tensor->handle()); fn = std::move(l); } @@ -1795,7 +1527,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) { auto l = std::make_unique<::arm_compute::NEPermute>(); - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv); + l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv); fn = std::move(l); } @@ -1834,13 +1566,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) { size_t output_rank = _ctx.at(output_index).shape().rank(); - const auto &output_alloc = _tensor_builder->at(output_index); - orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape()); - assert(output_rank == output_alloc->num_dimensions()); - if (output_rank != output_alloc->info()->num_dimensions()) + const auto &output_tensor = _tensor_builder->at(output_index); + orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape()); + assert(output_rank == output_tensor->num_dimensions()); + if (output_rank != output_tensor->info()->num_dimensions()) { // This means that high dimension's value is 1 and ifm tensor is applied dim_correction - output_alloc->info()->set_tensor_shape(acl_common::asTensorShape( + output_tensor->info()->set_tensor_shape(acl_common::asTensorShape( _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false)); } } @@ -1858,17 +1590,17 @@ void KernelGenerator::visit(const ir::operation::Add &node) const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Div &node) @@ -1879,16 +1611,16 @@ void KernelGenerator::visit(const ir::operation::Div &node) const auto activation = node.param().activation; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle())); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Exp &node) @@ -1896,12 +1628,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<::arm_compute::NEExpLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1913,12 +1645,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input_alloc = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input_tensor = _tensor_builder->at(input_index).get(); auto fn = std::make_unique<::arm_compute::NEReshapeLayer>(); - fn->configure(input_alloc->handle(), output_alloc->handle()); + fn->configure(input_tensor->handle(), output_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1933,13 +1665,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto comparison_type = node.param().comparison_type; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto input0_alloc = _tensor_builder->at(input0_index).get(); - auto input1_alloc = _tensor_builder->at(input1_index).get(); + auto output_tensor = _tensor_builder->at(output_index).get(); + auto input0_tensor = _tensor_builder->at(input0_index).get(); + auto input1_tensor = _tensor_builder->at(input1_index).get(); auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>(); - fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), (arm_compute::ComparisonOperation)comparison_type); auto acl_fn = asAclFunction(std::move(fn)); @@ -1953,13 +1685,13 @@ void KernelGenerator::visit(const ir::operation::Min &node) const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEElementwiseMin>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); @@ -1972,13 +1704,13 @@ void KernelGenerator::visit(const ir::operation::Max &node) const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - auto ofm_alloc = _tensor_builder->at(ofm_index).get(); - auto lhs_alloc = _tensor_builder->at(lhs_index).get(); - auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_builder->at(ofm_index).get(); + auto lhs_tensor = _tensor_builder->at(lhs_index).get(); + auto rhs_tensor = _tensor_builder->at(rhs_index).get(); auto fn = std::make_unique<::arm_compute::NEElementwiseMax>(); - fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); auto acl_fn = asAclFunction(std::move(fn)); |