diff options
Diffstat (limited to 'runtimes/pure_arm_compute/src')
164 files changed, 12047 insertions, 1703 deletions
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 2488332a6..bed42529b 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file compilation.cc + * @brief This file contains ANeuralNetworksCompilation APIs and related classes + * @ingroup COM_AI_RUNTIME + */ + #include <NeuralNetworks.h> // For CLKernelLibraryEx initialization @@ -25,28 +31,42 @@ #include <arm_compute/runtime/CL/CLScheduler.h> #include <arm_compute/runtime/CL/CLSubTensor.h> #include <arm_compute/runtime/CL/functions/CLArithmeticAddition.h> -#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h> +#include <arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h> +#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h> #include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h> #include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h> #include <arm_compute/runtime/CL/functions/CLPoolingLayer.h> #include <arm_compute/runtime/CL/functions/CLActivationLayer.h> +#include <arm_compute/runtime/CL/functions/CLActivationLayerEx.h> #include <arm_compute/runtime/CL/functions/CLScale.h> +#include <arm_compute/runtime/CL/functions/CLSpaceToBatchND.h> +#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h> #include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> -#include <arm_compute/runtime/CL/functions/CLStridedSlice.h> +#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h> #include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h> #include <arm_compute/runtime/CL/functions/CLGather.h> +#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h> #include <arm_compute/runtime/CL/functions/CLTopKV2.h> -#include <arm_compute/runtime/CL/functions/CLReduceMax.h> +#include <arm_compute/runtime/CL/functions/CLArgMinMax.h> #include <arm_compute/runtime/CL/functions/CLCast.h> #include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h> #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h> #include <arm_compute/runtime/CL/functions/CLDequantizationLayer.h> -#include <arm_compute/runtime/CL/functions/CLReductionMean.h> -#include <arm_compute/runtime/CL/functions/CLTranspose.h> +#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h> +#include <arm_compute/runtime/CL/functions/CLPermuteEx.h> +#include <arm_compute/runtime/CL/functions/CLReduceOperation.h> #include <arm_compute/runtime/CL/functions/CLRNNLayer.h> #include <arm_compute/runtime/CL/functions/CLFloor.h> #include <arm_compute/runtime/CL/functions/CLCopy.h> -#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h> +#include <arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h> +#include <arm_compute/runtime/CL/functions/CLExp.h> +#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h> +#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h> +#include <arm_compute/runtime/CL/functions/CLSquaredDifference.h> +#include <arm_compute/runtime/CL/functions/CLNeg.h> +#include <arm_compute/runtime/CL/functions/CLPReLU.h> +#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h> +#include <arm_compute/runtime/CL/functions/CLComparisonOp.h> #include <arm_compute/runtime/SubTensor.h> #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h> @@ -58,7 +78,7 @@ #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h> #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h> #include <arm_compute/runtime/NEON/functions/NEFloor.h> -#include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h> +#include <arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h> #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> #include "internal/arm_compute.h" @@ -74,19 +94,28 @@ #include "internal/arm_compute/tensor/View.h" #include "internal/layers/GenericReshapeLayer.h" #include "internal/layers/SimpleArithmeticAddition.h" +#include "internal/layers/SimplePadLayer.h" #include "internal/layers/SimpleCastLayer.h" +#include "internal/layers/SimpleTransposeConv.h" #include "internal/layers/GenericFullyConnectedLayer.h" -#include "internal/layers/PadLayer.h" #include "internal/layers/SimpleSpaceToDepth.h" #include "internal/layers/SimpleEmbeddingLookup.h" -#include "internal/layers/SquaredDifferenceOperation.h" - -#include "util/matrix/IndexIterator.h" -#include "util/kernel/IndexIterator.h" -#include "util/feature/IndexIterator.h" -#include "util/tensor/IndexIterator.h" - -#include <nnfw/std/memory.h> +#include "internal/layers/SimpleDepthToSpace.h" +#include "internal/layers/SimpleBatchToSpaceNd.h" +#include "internal/layers/SimpleHashtableLookupLayer.h" +#include "internal/layers/SimplePackLayer.h" +#include "internal/layers/SimpleSpaceToBatchND.h" +#include "internal/layers/SimpleNeg.h" +#include "internal/layers/SimpleUnpackLayer.h" +#include "internal/layers/SimpleSQRT.h" +#include "internal/layers/SimpleArgMinMax.h" + +#include "misc/matrix/IndexIterator.h" +#include "misc/kernel/IndexIterator.h" +#include "misc/feature/IndexIterator.h" +#include "misc/tensor/IndexIterator.h" + +#include <cpp14/memory.h> #include "compilation.h" #include "model.h" @@ -154,8 +183,7 @@ Padding valid_padding(void) return padding; } -Padding same_padding(const nnfw::util::feature::Shape &ifm_shape, - const nnfw::util::feature::Shape &ofm_shape, const Stride &stride, uint32_t kw, +Padding same_padding(const nnfw::misc::feature::Shape &ifm_shape, const Stride &stride, uint32_t kw, uint32_t kh) { Padding padding; @@ -164,13 +192,16 @@ Padding same_padding(const nnfw::util::feature::Shape &ifm_shape, // // SAME padding. Padding on both ends are the "same": // - // padding_to_beginning = total_padding / 2 - // padding_to_end = (total_padding + 1)/2. + // padding_to_beginning = total_padding / 2 + // padding_to_end = (total_padding + 1)/2. // - const int32_t vertical_needed_input = (ofm_shape.H - 1) * stride.vertical + kh; + const int32_t out_size_height = (ifm_shape.H + stride.vertical - 1) / stride.vertical; + const int32_t out_size_width = (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; + + const int32_t vertical_needed_input = (out_size_height - 1) * stride.vertical + kh; const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H); - const int32_t horizontal_needed_input = (ofm_shape.W - 1) * stride.horizontal + kw; + const int32_t horizontal_needed_input = (out_size_width - 1) * stride.horizontal + kw; const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W); padding.top = vertical_total_padding / 2; @@ -208,14 +239,14 @@ using namespace std::placeholders; template <typename T> static void initFeatureTensor(::arm_compute::ITensor &tensor, - const nnfw::util::feature::Shape &feature_shape, + const nnfw::misc::feature::Shape &feature_shape, const uint8_t *feature_base, const size_t feature_size) { const ::internal::nnapi::feature::Reader<T> from{ feature_shape, reinterpret_cast<const T *>(feature_base), feature_size}; ::internal::arm_compute::feature::View<T> into{&tensor}; - ::nnfw::util::feature::iterate(feature_shape) + ::nnfw::misc::feature::iterate(feature_shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { const auto value = from.at(batch, ch, row, col); into.at(batch, ch, row, col) = value; @@ -241,29 +272,29 @@ static void initVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_ template <typename T> static void initTensor3D(::arm_compute::ITensor &tensor, - const nnfw::util::tensor::Shape &tensor_shape, const uint8_t *tensor_base, + const nnfw::misc::tensor::Shape &tensor_shape, const uint8_t *tensor_base, const size_t tensor_size) { const ::internal::nnapi::tensor::Reader<T> from{ tensor_shape, reinterpret_cast<const T *>(tensor_base), tensor_size}; ::internal::arm_compute::tensor::View<T> into{&tensor}; - ::nnfw::util::tensor::iterate(tensor_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) { - ::nnfw::util::tensor::Index index_ACL = ::nnfw::util::tensor::copy_reverse(index_nnapi); + ::nnfw::misc::tensor::iterate(tensor_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) { + ::nnfw::misc::tensor::Index index_ACL = ::nnfw::misc::tensor::copy_reverse(index_nnapi); into.at(index_ACL) = from.at(index_nnapi); }; } template <typename T> static void initMatrixTensor(::arm_compute::ITensor &tensor, - const nnfw::util::matrix::Shape &matrix_shape, + const nnfw::misc::matrix::Shape &matrix_shape, const uint8_t *matrix_base, const size_t matrix_size) { const ::internal::nnapi::matrix::Reader<T> from{ matrix_shape, reinterpret_cast<const T *>(matrix_base), matrix_size}; ::internal::arm_compute::matrix::View<T> into{&tensor}; - ::nnfw::util::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) { + ::nnfw::misc::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) { const auto value = from.at(row, col); into.at(row, col) = value; }; @@ -288,34 +319,66 @@ static void initReorderVectorTensor(::arm_compute::ITensor &tensor, const uint8_ template <typename T> static void initKernelTensor(::arm_compute::ITensor &tensor, - const nnfw::util::kernel::Shape &kernel_shape, + const nnfw::misc::kernel::Shape &kernel_shape, const uint8_t *kernel_base, const size_t kernel_size) { const ::internal::nnapi::kernel::Reader<T> from{ kernel_shape, reinterpret_cast<const T *>(kernel_base), kernel_size}; ::internal::arm_compute::kernel::View<T> into{&tensor}; - ::nnfw::util::kernel::iterate(kernel_shape) + ::nnfw::misc::kernel::iterate(kernel_shape) << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) { const auto value = from.at(nth, ch, row, col); into.at(nth, ch, row, col) = value; }; } +/** + * @brief Structure to provide interface methods of compilation plan builder + */ struct IPlanBuilder { + /** + * @brief Destruct IPlanBuilder object using default destructor + */ virtual ~IPlanBuilder() = default; + /** + * @brief Add TensorInfo with Shape Constraints + * @param [in] ind Index of operand + * @param [in] info TensorInfo value to set to index of operand + * @return N/A + */ virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind, const ::arm_compute::TensorInfo &info) = 0; + /** + * @brief Add Subsumption constraints + * @param [in] ind Index of operand + * @param [in] base Index of base operand of Subsumption + * @param [in] offset Offset of Subsumption + * @param [in] shape Shape of Subsumption + * @param [in] extend_parent extend_parent value of Subsumption + * @return N/A + */ virtual void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind, const ::internal::tflite::operand::Index &base, const ::arm_compute::Coordinates &offset, const ::arm_compute::TensorShape &shape, bool extend_parent = false) = 0; + /** + * @brief Add Initializer lambda with ITensor param + * @param [in] ind Index of operand + * @param [in] initializer Initializer to add + * @return N/A + */ virtual void addInitializer(const ::internal::tflite::operand::Index &ind, const Initializer &initializer) = 0; - virtual void addStage(const Stage &) = 0; + /** + * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params + * @param [in] stage Stage to add + * @return N/A + */ + virtual void addStage(const Stage &stage) = 0; }; // @@ -333,7 +396,6 @@ private: void appendReLU(::arm_compute::ITensor *tensor); void appendReLU6(::arm_compute::ITensor *tensor); void appendReLU1(::arm_compute::ITensor *tensor); - void appendTanh(::arm_compute::ITensor *tensor); public: void append(FuseCode code, ::arm_compute::ITensor *tensor); @@ -349,7 +411,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), nullptr, act_info); @@ -357,7 +419,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, nullptr, act_info); @@ -372,7 +434,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), nullptr, act_info); @@ -380,7 +442,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, nullptr, act_info); @@ -395,7 +457,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), nullptr, act_info); @@ -403,7 +465,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, nullptr, act_info); @@ -411,23 +473,6 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc) } } -void ActivationBuilder::appendTanh(::arm_compute::ITensor *ifm_alloc) -{ - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - if (::internal::arm_compute::isGpuMode()) - { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(CAST_CL(ifm_alloc), nullptr, act_info); - - _builder.append("Tanh", std::move(fn)); - } - else - throw std::runtime_error("Not supported, yet"); -} - void ActivationBuilder::append(FuseCode code, ::arm_compute::ITensor *ifm_alloc) { switch (code) @@ -490,9 +535,11 @@ public: void visit(const ::internal::tflite::op::Softmax::Node &node) override; void visit(const ::internal::tflite::op::StridedSlice::Node &node) override; void visit(const ::internal::tflite::op::ReduceMax::Node &node) override; + void visit(const ::internal::tflite::op::ReduceMin::Node &node) override; void visit(const ::internal::tflite::op::Cast::Node &node) override; void visit(const ::internal::tflite::op::TopKV2::Node &node) override; void visit(const ::internal::tflite::op::Gather::Node &node) override; + void visit(const ::internal::tflite::op::PReLU::Node &node) override; void visit(const ::internal::tflite::op::ReLU::Node &node) override; void visit(const ::internal::tflite::op::ReLU1::Node &node) override; void visit(const ::internal::tflite::op::ReLU6::Node &node) override; @@ -504,15 +551,33 @@ public: void visit(const ::internal::tflite::op::LSTM::Node &node) override; void visit(const ::internal::tflite::op::Floor::Node &node) override; void visit(const ::internal::tflite::op::Split::Node &node) override; + void visit(const ::internal::tflite::op::ArgMax::Node &node) override; void visit(const ::internal::tflite::op::RSQRT::Node &node) override; + void visit(const ::internal::tflite::op::SQRT::Node &node) override; void visit(const ::internal::tflite::op::Pad::Node &node) override; void visit(const ::internal::tflite::op::SpaceToDepth::Node &node) override; + void visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) override; + void visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) override; void visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node) override; void visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node) override; void visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) override; void visit(const ::internal::tflite::op::HashtableLookup::Node &node) override; void visit(const ::internal::tflite::op::L2Normalization::Node &node) override; void visit(const ::internal::tflite::op::SquaredDifference::Node &node) override; + void visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node) override; + void visit(const ::internal::tflite::op::DepthToSpace::Node &node) override; + void visit(const ::internal::tflite::op::Unpack::Node &node) override; + void visit(const ::internal::tflite::op::Neg::Node &node) override; + void visit(const ::internal::tflite::op::Exp::Node &node) override; + void visit(const ::internal::tflite::op::ReduceSum::Node &node) override; + void visit(const ::internal::tflite::op::Equal::Node &node) override; + void visit(const ::internal::tflite::op::TransposeConv::Node &node) override; + void visit(const ::internal::tflite::op::Pack::Node &node) override; + void visit(const ::internal::tflite::op::Abs::Node &node) override; + void visit(const ::internal::tflite::op::NotEqual::Node &node) override; + void visit(const ::internal::tflite::op::LogicalAnd::Node &node) override; + void visit(const ::internal::tflite::op::LogicalNot::Node &node) override; + void visit(const ::internal::tflite::op::LogicalOr::Node &node) override; private: const ::internal::tflite::operand::Set &_ctx; @@ -582,7 +647,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node) // NOTE SimpleArithmeticAddition does not support broadcasting assert(lhs_shape == rhs_shape); - auto l = nnfw::make_unique<SimpleArithmeticAddition>(); + auto l = nnfw::cpp14::make_unique<SimpleArithmeticAddition>(); l->configure(lhs_alloc, rhs_alloc, ofm_alloc); @@ -592,7 +657,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node) { if (::internal::arm_compute::isGpuMode()) { - auto l = nnfw::make_unique<::arm_compute::CLArithmeticAddition>(); + auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>(); // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc), @@ -602,7 +667,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node) } else // NEON { - auto l = nnfw::make_unique<::arm_compute::NEArithmeticAddition>(); + auto l = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>(); // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE); @@ -672,7 +737,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLArithmeticSubtraction>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtractionEx>(); // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc), @@ -682,7 +747,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node) } else // NEON { - auto fn = nnfw::make_unique<::arm_compute::NEArithmeticSubtraction>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>(); // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE); @@ -696,7 +761,6 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node) _builder.addStage(stage); } -// TODO: test with scalar*scalar, tensor bigger than 3D (e.g., 4D) void Planner::visit(const ::internal::tflite::op::Mul::Node &node) { const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; @@ -754,7 +818,7 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseMultiplication>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>(); fn->configure(CAST_CL(lhs_input_alloc), CAST_CL(rhs_input_alloc), CAST_CL(output_alloc), 1.0, // scale @@ -765,9 +829,9 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node) } else // NEON { - auto fn = nnfw::make_unique<::arm_compute::NEPixelWiseMultiplication>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>(); - fn->configure(CAST_NE(lhs_input_alloc), CAST_NE(rhs_input_alloc), CAST_NE(output_alloc), + fn->configure(lhs_input_alloc, rhs_input_alloc, output_alloc, 1.0, // scale arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); @@ -836,11 +900,11 @@ void Planner::visit(const ::internal::tflite::op::Div::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseDivision>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseDivision>(); - // TODO Decide scale, overflow_policy, and rounding_policy. - // Currently, the default values are used. - fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc)); + fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc), + 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); builder.append("Div", std::move(fn)); } @@ -975,7 +1039,7 @@ void Planner::visit(const ::internal::tflite::op::Conv2D::Implicit::Node &node) param.stride = stride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H) + ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H) : valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); @@ -1255,7 +1319,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod param.stride = stride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H) + ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H) : valid_padding(); param.multipler = multiplier; @@ -1293,7 +1357,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc), conv_info, param.multipler); @@ -1302,7 +1366,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod } else { - auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler); @@ -1436,7 +1500,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc), conv_info, param.multipler); @@ -1445,7 +1509,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod } else { - auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler); @@ -1499,7 +1563,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node) if (from_env<bool>(std::getenv("USE_SIMPLE_CAST"))) { // Use the CPU version of CAST operation - auto l = nnfw::make_unique<SimpleCastLayer>(); + auto l = nnfw::cpp14::make_unique<SimpleCastLayer>(); l->configure(input_alloc, output_alloc); fn = std::move(l); @@ -1508,7 +1572,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node) { if (::internal::arm_compute::isGpuMode()) { - auto l = nnfw::make_unique<::arm_compute::CLCast>(); + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc)); fn = std::move(l); @@ -1554,10 +1618,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints and TensorInfo - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); // Construct operation parameters struct Param @@ -1586,7 +1652,7 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod param.stride.horizontal = hstride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh) + ? same_padding(ifm_shape, param.stride, kw, kh) : valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); @@ -1652,7 +1718,6 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; - // TODO 4D tensor (dim(0) !=1 ) const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); @@ -1669,10 +1734,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints and TensorInfo - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); // Construct operation parameters struct Param @@ -1782,10 +1849,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints and TensorInfo - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); // Construct operation parameters struct Param @@ -1814,7 +1883,7 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod param.stride.horizontal = hstride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh) + ? same_padding(ifm_shape, param.stride, kw, kh) : valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); @@ -1882,7 +1951,6 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; - // TODO 4D tensor (dim(0) != 1) const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); @@ -1899,10 +1967,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints and TensorInfo - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); // Construct operation parameters struct Param @@ -1998,8 +2068,9 @@ void Planner::visit(const ::internal::tflite::op::Concat::Node &node) } // Set Shape Constraints and TensorInfo (for output) - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); // Set Shape Constraints and TensorInfo (for input) const uint32_t coord_index = ToARMComputeAxis(input_rank, axis).value(); @@ -2060,7 +2131,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node) internal::tflite::operand::Shape reshape(2); if (input_rank == 4) { - nnfw::util::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature(); + nnfw::misc::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature(); auto feature_size = ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W; assert(feature_size == batch_size * input_size); @@ -2078,7 +2149,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node) else if (input_rank == 2) { auto ifm_shape = _ctx.at(input_index).shape(); - nnfw::util::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix(); + nnfw::misc::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix(); assert(ifm_shape.dim(0) == batch_size); assert(ifm_shape.dim(1) == input_size); @@ -2131,7 +2202,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node) auto weight_alloc = ctx.at(::internal::tflite::operand::Index{param.weight_index}); auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index}); - auto fn = nnfw::make_unique<GenericFullyConnectedLayer>(); + auto fn = nnfw::cpp14::make_unique<GenericFullyConnectedLayer>(); fn->configure(input_alloc, weight_alloc, bias_alloc, output_alloc, needs_reshape, asTensorShape(reshape)); @@ -2154,10 +2225,12 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node) // TODO Should move to the place where the operand is handled, if it is possible. // Set Shape Constraints - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); struct Param { @@ -2181,7 +2254,7 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLScale>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), ::arm_compute::InterpolationPolicy::BILINEAR, @@ -2202,18 +2275,19 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node) const ::internal::tflite::operand::Index output_index{node.param().output_index}; const ::internal::tflite::operand::Index input_index{node.param().input_index}; - // NOTE The content of a tensor specified by shape_index should be aligned with - // output tensor shape - // TODO Check consistency of ouput shape + auto input_shape = asTensorShape(_ctx.at(input_index).shape()); + auto output_shape = asTensorShape(_ctx.at(output_index).shape()); - // TODO Re-enable this assert - // assert((ifm_shape.C * ifm_shape.H * ifm_shape.W) == out_size); + assert(input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] == + output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]); // TODO Should move to the place where the operand is handled, if it is possible. - _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), - _ctx.at(output_index).type())); - _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), - _ctx.at(input_index).type())); + _builder.addShapeConstr(output_index, asTensorInfo(output_shape, _ctx.at(output_index).type(), + _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, asTensorInfo(input_shape, _ctx.at(input_index).type(), + _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); struct Param { @@ -2233,7 +2307,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node) if (::internal::arm_compute::isGpuMode()) { // GenericReshape first apply NCHW->NHWC permutation, and apply reshape - auto fn = nnfw::make_unique<GenericReshapeLayer>(); + auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>(); fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc)); @@ -2241,7 +2315,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node) } else { - auto fn = nnfw::make_unique<GenericReshapeLayer>(); + auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>(); fn->configure(input_alloc, output_alloc); @@ -2259,19 +2333,15 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node) const ::internal::tflite::operand::Index output_index{node.param().output_index}; const ::internal::tflite::operand::Index input_index{node.param().input_index}; - // Currently, 3D-input with dims is tested. Note that param(). dims_index_optional is optional. - // two generated test passed: - // - 3D input : squeeze_float_1 - // - 2D input : squeeze_3D_float_1 - // - 4D input fails (squeeze.mod.py) -> we need general tensor support - - // TODO Support generic tensor shape - // Set Shape Constraints - _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), - _ctx.at(output_index).type())); - _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), - _ctx.at(input_index).type())); + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); // Construct operation parameters struct Param @@ -2291,7 +2361,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReshapeLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>(); fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc)); @@ -2299,7 +2369,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEReshapeLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReshapeLayer>(); fn->configure(input_alloc, output_alloc); @@ -2350,7 +2420,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLSoftmaxLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>(); fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.scale); @@ -2358,7 +2428,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NESoftmaxLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>(); fn->configure(input_alloc, output_alloc, param.scale); @@ -2397,14 +2467,18 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) assert(_ctx.at(startData_index).shape().rank() == 1); assert(_ctx.at(endData_index).shape().rank() == 1); assert(_ctx.at(stridesData_index).shape().rank() == 1); - _builder.addShapeConstr(startData_index, - asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()), - _ctx.at(startData_index).type())); + _builder.addShapeConstr( + startData_index, + asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()), _ctx.at(startData_index).type(), + _ctx.at(startData_index).scale(), _ctx.at(startData_index).zeroPoint())); _builder.addShapeConstr(endData_index, asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), - _ctx.at(endData_index).type())); - _builder.addShapeConstr(stridesData_index, - asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), - _ctx.at(stridesData_index).type())); + _ctx.at(endData_index).type(), + _ctx.at(endData_index).scale(), + _ctx.at(endData_index).zeroPoint())); + _builder.addShapeConstr( + stridesData_index, + asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), _ctx.at(stridesData_index).type(), + _ctx.at(stridesData_index).scale(), _ctx.at(stridesData_index).zeroPoint())); // Set initializers for indices data such as order of inputData { @@ -2469,7 +2543,7 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLStridedSlice>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSliceEx>(); fn->configure(CAST_CL(inputData_alloc), CAST_CL(outputData_alloc), CAST_CL(startData_alloc), CAST_CL(endData_alloc), CAST_CL(stridesData_alloc), param.beginMask, @@ -2484,6 +2558,133 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node) _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node) +{ + VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + assert(ifm_shape.rank() <= 4); + assert(ofm_shape.rank() <= ifm_shape.rank()); + assert(_ctx.at(axis_index).hasData()); + assert(axis_shape.rank() == 0 || axis_shape.rank() == 1); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } + + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + std::set<uint32_t> axis; + { + const auto ifm_rank = ifm_shape.rank(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + break; + } + } + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + std::set<uint32_t> axis; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.axis = axis; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MIN); + + builder.append("ReduceMin", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); +} + void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) { VERBOSE(ReduceMax) << "Configure REDUCEMAX operation" << std::endl; @@ -2492,43 +2693,104 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; - // Handle special case only: - // Input: Matrix (rank 2) - // Output: Vector (rank 1) - // Axis: one element (scalar or rank 1 with 1 element), constant auto ifm_shape = _ctx.at(ifm_index).shape(); auto ofm_shape = _ctx.at(ofm_index).shape(); auto axis_shape = _ctx.at(axis_index).shape(); - assert(ofm_shape.rank() == 1); - assert(ifm_shape.rank() == 2); + assert(ifm_shape.rank() <= 4); + assert(ofm_shape.rank() <= ifm_shape.rank()); assert(_ctx.at(axis_index).hasData()); - assert(axis_shape.rank() == 0 || ((axis_shape.rank() == 1) && (axis_shape.dim(0) == 1))); + assert(axis_shape.rank() == 0 || axis_shape.rank() == 1); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + std::set<uint32_t> axis; + { + const auto ifm_rank = ifm_shape.rank(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); - // Note: Assume only one element in axis. It is checked by assertion above - // TODO: handle general case - // Axis is integer value (generally, int32) - int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>(); - assert(axis_value == 1); + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + break; + } + } // Construct operation parameters struct Param { int ofm_index; int ifm_index; - - int32_t axis; + std::set<uint32_t> axis; }; Param param; param.ofm_index = ofm_index.asInt(); param.ifm_index = ifm_index.asInt(); - param.axis = axis_value; + param.axis = axis; auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); @@ -2536,9 +2798,10 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReduceMax>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - fn->configure(CAST_CL(ifm_alloc), param.axis, CAST_CL(ofm_alloc)); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MAX); builder.append("ReduceMax", std::move(fn)); } @@ -2586,7 +2849,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node) if (from_env<bool>(std::getenv("USE_SIMPLE_CAST"))) { // Use the CPU version of CAST operation - auto l = nnfw::make_unique<SimpleCastLayer>(); + auto l = nnfw::cpp14::make_unique<SimpleCastLayer>(); l->configure(input_alloc, output_alloc); fn = std::move(l); @@ -2595,7 +2858,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node) { if (::internal::arm_compute::isGpuMode()) { - auto l = nnfw::make_unique<::arm_compute::CLCast>(); + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc)); fn = std::move(l); @@ -2627,13 +2890,18 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node) // Set shape constraints _builder.addShapeConstr(outputValues_index, asTensorInfo(asTensorShape(_ctx.at(outputValues_index).shape()), - _ctx.at(outputValues_index).type())); + _ctx.at(outputValues_index).type(), + _ctx.at(outputValues_index).scale(), + _ctx.at(outputValues_index).zeroPoint())); _builder.addShapeConstr(outputIndices_index, asTensorInfo(asTensorShape(_ctx.at(outputIndices_index).shape()), - _ctx.at(outputIndices_index).type())); - _builder.addShapeConstr(inputData_index, - asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), - _ctx.at(inputData_index).type())); + _ctx.at(outputIndices_index).type(), + _ctx.at(outputIndices_index).scale(), + _ctx.at(outputIndices_index).zeroPoint())); + _builder.addShapeConstr( + inputData_index, + asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), _ctx.at(inputData_index).type(), + _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint())); // Construct operation parameters struct Param @@ -2659,7 +2927,7 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLTopKV2>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>(); fn->configure(CAST_CL(input_alloc), param.k, CAST_CL(values_alloc), CAST_CL(indices_alloc)); @@ -2686,12 +2954,15 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node) assert(_ctx.at(rhs_index).shape().rank() == 1); // Set Shape Constraints - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), - _ctx.at(lhs_index).type())); - _builder.addShapeConstr(rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), - _ctx.at(rhs_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(), + _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint())); + _builder.addShapeConstr( + rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(), + _ctx.at(rhs_index).scale(), _ctx.at(ofm_index).zeroPoint())); // Construct operation parameters struct Param @@ -2720,7 +2991,7 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node) { std::unique_ptr<::arm_compute::IFunction> fn; - auto l = nnfw::make_unique<::arm_compute::CLGather>(); + auto l = nnfw::cpp14::make_unique<::arm_compute::CLGather>(); l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc)); fn = std::move(l); builder.append("Gather", std::move(fn)); @@ -2732,6 +3003,62 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node) _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::PReLU::Node &node) +{ + VERBOSE(PReLU) << "Configure PReLU operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index}; + + // Set shape constraints + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + _builder.addShapeConstr(alpha_index, + asTensorInfo(asTensorShape(_ctx.at(alpha_index).shape()), + _ctx.at(alpha_index).type(), _ctx.at(alpha_index).scale(), + _ctx.at(alpha_index).zeroPoint())); + + struct Param + { + int ofm_index; + int ifm_index; + int alpha_index; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.alpha_index = alpha_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto alpha_alloc = ctx.at(::internal::tflite::operand::Index{param.alpha_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>(); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(alpha_alloc), CAST_CL(ofm_alloc)); + builder.append("PReLU", std::move(fn)); + } + else + { + // TODO Add NEON support + + throw std::runtime_error("Not supported, yet"); + } + }; + + _builder.addStage(stage); +} + void Planner::visit(const ::internal::tflite::op::ReLU::Node &node) { VERBOSE(ReLU) << "Configure ReLU operation" << std::endl; @@ -2767,7 +3094,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info); @@ -2775,7 +3102,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, ofm_alloc, act_info); @@ -2821,7 +3148,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info); @@ -2829,7 +3156,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, ofm_alloc, act_info); @@ -2875,7 +3202,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info); @@ -2883,7 +3210,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, ofm_alloc, act_info); @@ -2902,10 +3229,12 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node) const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; // Set shape constraints - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); struct Param { @@ -2927,7 +3256,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info); @@ -2935,7 +3264,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); fn->configure(ifm_alloc, ofm_alloc, act_info); @@ -2981,14 +3310,20 @@ void Planner::visit(const ::internal::tflite::op::Logistic::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info); builder.append("Logistic", std::move(fn)); } else - throw std::runtime_error("Not supported, yet"); + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(ifm_alloc, ofm_alloc, act_info); + + builder.append("Logistic", std::move(fn)); + } }; _builder.addStage(stage); @@ -3005,52 +3340,89 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) const ::internal::tflite::operand::Index keep_dims_index{node.param().keep_dims_index}; const int keep_dims = _ctx.at(keep_dims_index).asScalar<int>(); - // Set shape constraints - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); - _builder.addShapeConstr(axis_index, asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()), - _ctx.at(axis_index).type())); + const auto ifm_shape = _ctx.at(ifm_index).shape(); + const auto ofm_shape = _ctx.at(ofm_index).shape(); - // TODO keep_dims==0 - assert(keep_dims != 0); + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } - // Set axis - // TODO Other axis (Axis for width and height are currently supported.) - // TODO Other ranks (Rank 4 is currently supported.) - assert(_ctx.at(ifm_index).shape().rank() == 4); + // Set shape constraints + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + _builder.addShapeConstr(axis_index, + asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()), + _ctx.at(axis_index).type(), _ctx.at(axis_index).scale(), + _ctx.at(axis_index).zeroPoint())); - std::vector<uint32_t> axis; + std::set<uint32_t> axis; { - const auto axis_base = _ctx.at(axis_index).data().base(); - const auto axis_type = _ctx.at(axis_index).type(); - const auto axis_size = _ctx.at(axis_index).shape().asVector(); - - // NHWC type -> WHCN type - if (_ctx.at(ofm_index).shape().rank() == 4) + const auto ifm_rank = ifm_shape.rank(); + const auto axis_shape = _ctx.at(axis_index).shape(); + switch (axis_shape.rank()) { - for (uint32_t n = 0; n < axis_size; ++n) + case 0: // scalar { - const ::arm_compute::Coordinates coordinate{n}; - const int32_t *from = reinterpret_cast<const int32_t *>(axis_base) + n; - if (*from == 1) + int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>(); + if (axis_value < 0) { - axis.push_back(1); // h + axis_value += ifm_rank; } - else if (*from == 2) - { - axis.push_back(0); // w - } - else if (*from < 0) - { - // Nothing to do - } - else + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) { - throw std::runtime_error{"Not supported axis"}; + int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value()); } + break; } + default: + throw std::runtime_error("Not supported"); + break; } } @@ -3058,7 +3430,7 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) { int ofm_index; int ifm_index; - std::vector<uint32_t> axis; + std::set<uint32_t> axis; }; Param param; @@ -3073,9 +3445,10 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLReductionMean>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::MEAN); builder.append("Mean", std::move(fn)); } @@ -3125,23 +3498,37 @@ void Planner::visit(const ::internal::tflite::op::RNN::Node &node) num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); // Set Shape Constraints and TensorInfo - _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), - _ctx.at(output_index).type())); + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); _builder.addShapeConstr(hidden_state_out_index, asTensorInfo(asTensorShape(_ctx.at(hidden_state_out_index).shape()), - _ctx.at(hidden_state_out_index).type())); - _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), - _ctx.at(input_index).type())); + _ctx.at(hidden_state_out_index).type(), + _ctx.at(hidden_state_out_index).scale(), + _ctx.at(hidden_state_out_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); _builder.addShapeConstr(weights_index, asTensorInfo(asTensorShape(_ctx.at(weights_index).shape()), - _ctx.at(weights_index).type())); + _ctx.at(weights_index).type(), + _ctx.at(weights_index).scale(), + _ctx.at(weights_index).zeroPoint())); _builder.addShapeConstr(recurrent_weights_index, asTensorInfo(asTensorShape(_ctx.at(recurrent_weights_index).shape()), - _ctx.at(recurrent_weights_index).type())); - _builder.addShapeConstr(bias_index, asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()), - _ctx.at(bias_index).type())); + _ctx.at(recurrent_weights_index).type(), + _ctx.at(recurrent_weights_index).scale(), + _ctx.at(recurrent_weights_index).zeroPoint())); + _builder.addShapeConstr(bias_index, + asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()), + _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(), + _ctx.at(bias_index).zeroPoint())); _builder.addShapeConstr(hidden_state_in_index, asTensorInfo(asTensorShape(_ctx.at(hidden_state_in_index).shape()), - _ctx.at(hidden_state_in_index).type())); + _ctx.at(hidden_state_in_index).type(), + _ctx.at(hidden_state_in_index).scale(), + _ctx.at(hidden_state_in_index).zeroPoint())); // Construct operation parameters struct Param @@ -3215,13 +3602,13 @@ void Planner::visit(const ::internal::tflite::op::LSTM::Node &node) void Planner::visit(const ::internal::tflite::op::Transpose::Node &node) { VERBOSE(Transpose) << "Configure Transpose operation" << std::endl; - // Transpose supports only height-wight dimention support. - // CLPermute can be used to implement generic transpose along any axis - // But CLPermute only implements [2,0,1], [1,2,0], [3,2,0,1] - // TODO Implement other permutation CLPermute function and provide generic transpose const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index permu_index{node.param().permu_index}; + + assert(_ctx.at(ifm_index).shape().rank() == _ctx.at(ofm_index).shape().rank()); + assert(_ctx.at(permu_index).hasData() == true); // Set shape constraints _builder.addShapeConstr( @@ -3230,30 +3617,41 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node) _builder.addShapeConstr( ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); - // NNAPI spec provides permutation vector for generic transpose - // TODO Make the permutation vector a part of Param + struct Param { int ofm_index; int ifm_index; + const int32_t *pv; + int rank; }; Param param; param.ofm_index = ofm_index.asInt(); param.ifm_index = ifm_index.asInt(); + param.pv = reinterpret_cast<const int32_t *>(_ctx.at(permu_index).data().base()); + param.rank = _ctx.at(ifm_index).shape().rank(); auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); const auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); - // CLTranspose assumes only spatial transpose, will be replaced with CLPermute - // TODO Check the validity of permutation vector, then call CLPermute with permu vector - auto fn = nnfw::make_unique<::arm_compute::CLTranspose>(); + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermuteEx>(); - fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc)); + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), + getARMComputePermutationVector(param.rank, param.pv)); + + builder.append("Transpose", std::move(fn)); + } + else + { + throw std::runtime_error("Not supported, yet"); + } - builder.append("Transpose", std::move(fn)); }; _builder.addStage(stage); @@ -3267,10 +3665,12 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node) const ::internal::tflite::operand::Index ifm_index{node.param().input_index}; // Set shape constraints - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); struct Param { @@ -3289,7 +3689,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLFloor>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc)); @@ -3297,7 +3697,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NEFloor>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>(); fn->configure(ifm_alloc, ofm_alloc); @@ -3308,11 +3708,367 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node) _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::ArgMax::Node &node) +{ + VERBOSE(ArgMax) << "Configure ARGMAX operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + + assert(_ctx.at(axis_index).hasData()); + // Axis dimension is always 1. + assert(axis_shape.rank() == 1); + assert(ifm_shape.rank() == ofm_shape.rank()); + + _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false), + _ctx.at(ofm_index).type())); + _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false), + _ctx.at(ifm_index).type())); + + std::vector<uint32_t> l_axis; + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + auto axis_base = _ctx.at(axis_index).data().base(); + auto axis_type = _ctx.at(axis_index).type(); + // TODO Should support axis size > 1. + assert(axis_size == 1); + // axis is tensor with 1 dimension - always a vector. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_shape.rank(); + } + l_axis.push_back(ToARMComputeAxis(ifm_shape.rank(), axis_value).value()); + } + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + std::vector<uint32_t> axis; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.axis = l_axis; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (from_env<bool>(std::getenv("USE_SIMPLE_ARGMINMAX"))) + { + // USE CPU VERSION OF ARGMAX + auto fn = nnfw::cpp14::make_unique<SimpleArgMinMax>(); + + fn->configure(ifm_alloc, ofm_alloc, param.axis, ::arm_compute::ArgOperation::MAX); + + builder.append("ArgMax", std::move(fn)); + } + else + { + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgMinMax>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ArgOperation::MAX); + + builder.append("ArgMax", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::SQRT::Node &node) +{ + VERBOSE(SQRT) << "Configure SQRT operation" << std::endl; + + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + + // Set shape constraints + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); + + struct Param + { + int output_index; + int input_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; + + if (from_env<bool>(std::getenv("USE_SIMPLE_SQRT"))) + { + // USE CPU VERSION OF SQRT + auto fn = nnfw::cpp14::make_unique<SimpleSQRT>(); + + fn->configure(input_alloc, output_alloc); + + builder.append("SQRT", std::move(fn)); + } + else + { + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info); + + builder.append("SQRT", std::move(fn)); + } + else + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>(); + + fn->configure(input_alloc, output_alloc, act_info); + + builder.append("SQRT", std::move(fn)); + } + } + }; + + _builder.addStage(stage); +} + void Planner::visit(const ::internal::tflite::op::RSQRT::Node &node) { VERBOSE(RSQRT) << "Configure Rsqrt operation" << std::endl; - throw std::runtime_error("Not supported, yet"); + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + + // Set shape constraints + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape()), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); + + struct Param + { + int output_index; + int input_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + const ::arm_compute::ActivationLayerInfoEx act_info{ + ::arm_compute::ActivationLayerInfoEx::ActivationFunction::RSQRT}; + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayerEx>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info); + + builder.append("RSQRT", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::Equal::Node &node) +{ + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input1_index{node.param().input1_index}; + const ::internal::tflite::operand::Index input2_index{node.param().input2_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + + if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank()); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape()) + .extendRank(broadcast_rank); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape()) + .extendRank(broadcast_rank); + } + _builder.addShapeConstr(input1_index, + asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()), + _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(), + _ctx.at(input1_index).zeroPoint())); + _builder.addShapeConstr(input2_index, + asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()), + _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(), + _ctx.at(input2_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input1_index; + int input2_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input1_index = input1_index.asInt(); + param.input2_index = input2_index.asInt(); + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index}); + auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>(); + + fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc), + ::arm_compute::ComparisonOperation::EQUAL); + + builder.append("Equal", std::move(fn)); + } + else + { + // TODO Add NEON support + + throw std::runtime_error("Not supported, yet"); + } + }; + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node) +{ + VERBOSE(TransposeConv) << "Configure TransposeConv operation" << std::endl; + + const ::internal::tflite::operand::Index op_shape_index{node.param().op_shape_index}; + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index ker_index{node.param().ker_index}; + + const ::internal::tflite::operand::Index padding_index{node.param().padding_index}; + const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index}; + const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index}; + + // Only 4D tensors are supported + assert(_ctx.at(ofm_index).shape().rank() == 4); + assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); + assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); + + assert(_ctx.at(padding_index).hasData() == true); + + const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>(); + const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>(); + + const PaddingCode padding_type = + static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>()); + + assert(vstride > 0); + assert(hstride > 0); + assert((ANEURALNETWORKS_PADDING_SAME == padding_type) || + (ANEURALNETWORKS_PADDING_VALID == padding_type)); + assert(ifm_shape.N == ofm_shape.N); + assert(ifm_shape.C == ker_shape.C); + assert(ker_shape.N == ofm_shape.C); + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(), + _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + int ker_index; + Padding padding; + Stride stride; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.ker_index = ker_index.asInt(); + + param.stride.horizontal = hstride; + param.stride.vertical = vstride; + + param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) + ? same_padding(ifm_shape, param.stride, ker_shape.W, ker_shape.H) + : valid_padding(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index}); + + auto fn = nnfw::cpp14::make_unique<SimpleTransposeConv>(); + + // Only rank 4 is supported + const int rank = 4; + + auto tconv_info = asPadStringInfo(param.padding, param.stride); + + fn->configure(ifm_alloc, ker_alloc, ofm_alloc, tconv_info, getARMComputeAxises(rank)); + + builder.append("TransposeConv", std::move(fn)); + }; + _builder.addStage(stage); } void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) @@ -3320,7 +4076,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index}; const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index}; - const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; // Set Shape Constraints and TensorInfo _builder.addShapeConstr( @@ -3349,8 +4104,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) int ofm_index; int lhs_index; int rhs_index; - - FuseCode activation; }; Param param; @@ -3359,8 +4112,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) param.lhs_index = lhs_index.asInt(); param.rhs_index = rhs_index.asInt(); - param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); - auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index}); @@ -3368,26 +4119,17 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<SquaredDifferenceOperation>(); - - // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification - fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0, - ::arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSquaredDifference>(); + fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc)); builder.append("SquaredDifference", std::move(fn)); } - else // NEON + else { - auto fn = nnfw::make_unique<SquaredDifferenceOperation>(); - - // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification - fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0, - ::arm_compute::RoundingPolicy::TO_ZERO); - - builder.append("SquaredDifference", std::move(fn)); + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); } - ActivationBuilder{builder}.append(param.activation, ofm_alloc); }; _builder.addStage(stage); @@ -3446,55 +4188,87 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node) const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor(); + assert(_ctx.at(paddings_index).hasData() == true); // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(ifm_index, + asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false), + _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(), + _ctx.at(ifm_index).zeroPoint())); + _builder.addShapeConstr(ofm_index, + asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false), + _ctx.at(ofm_index).type(), _ctx.at(ofm_index).scale(), + _ctx.at(ofm_index).zeroPoint())); _builder.addShapeConstr( - ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), - _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); - _builder.addShapeConstr( - ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), - _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); - _builder.addShapeConstr( - paddings_index, - asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(), - _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint())); + paddings_index, asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape(), false), + _ctx.at(paddings_index).type(), _ctx.at(paddings_index).scale(), + _ctx.at(paddings_index).zeroPoint())); + + // initializer for padding + { + auto pad_type = _ctx.at(paddings_index).type(); + + if (pad_type == ANEURALNETWORKS_TENSOR_INT32) + { + auto pad_base = _ctx.at(paddings_index).data().base(); + auto pad_size = _ctx.at(paddings_index).data().size(); + auto pad_shape = _ctx.at(paddings_index).shape().asMatrix(); + + // Supported padding for height and width only. + auto initializer = std::bind(initMatrixTensor<int32_t>, _1, pad_shape, pad_base, pad_size); + _builder.addInitializer(paddings_index, initializer); + } + else + { + throw std::runtime_error("Only Int32 datatype is supported for Pad values"); + } + } // Construct operation parameters struct Param { int ofm_index; int ifm_index; - int32_t padding_size; + int padding_index; }; Param param; param.ofm_index = ofm_index.asInt(); param.ifm_index = ifm_index.asInt(); - - assert(_ctx.at(paddings_index).hasData() == true); - - // TODO: Currently we are supporting uniform padding for the tensor, so only a single - // value is being read. (TOP = BOTTOM = LEFT = RIGHT). - // Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT) - - const auto &padding_data = _ctx.at(paddings_index).data(); - auto base = padding_data.base(); - auto padsize = reinterpret_cast<const int *>(base) + 3; - param.padding_size = *padsize; + param.padding_index = paddings_index.asInt(); auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto pad_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_index}); - auto fn = nnfw::make_unique<PadLayer>(); + if (from_env<bool>(std::getenv("USE_SIMPLE_PAD"))) + { + // USE CPU VERSION OF PADLAYER + auto rank = 4; + auto fn = nnfw::cpp14::make_unique<SimplePadLayer>(); - fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_size); - builder.append("Pad", std::move(fn)); + fn->configure(ifm_alloc, ofm_alloc, pad_alloc, getARMComputeAxises(rank)); + builder.append("PAD", std::move(fn)); + } + else + { + if (::internal::arm_compute::isGpuMode()) // GPU + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayerEx>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), CAST_CL(pad_alloc)); + + builder.append("PAD", std::move(fn)); + } + else // NEON + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + } }; _builder.addStage(stage); @@ -3506,6 +4280,21 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node) const ::internal::tflite::operand::Index input_index{node.param().input_index}; const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + const auto input_batch = _ctx.at(input_index).shape().dim(0); + const auto output_batch = _ctx.at(output_index).shape().dim(0); + const auto input_depth = _ctx.at(input_index).shape().dim(3); + const auto output_depth = _ctx.at(output_index).shape().dim(3); + const auto block_size = _ctx.at(block_size_index).asScalar<int32_t>(); + const auto input_height = _ctx.at(input_index).shape().dim(1); + const auto input_width = _ctx.at(input_index).shape().dim(2); + + // All assertions as per NNAPI specification. + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + assert((block_size >= 1) && (input_height % block_size == 0) && (input_width % block_size == 0)); + assert(input_batch == output_batch); + assert(input_depth * block_size * block_size == output_depth); + // Set Shape Constraints and TensorInfo _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false), @@ -3528,17 +4317,284 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node) param.output_index = output_index.asInt(); param.input_index = input_index.asInt(); - param.block_size = _ctx.at(block_size_index).asScalar<int32_t>(); + param.block_size = block_size; auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); - auto rank = 4; - auto fn = nnfw::make_unique<SimpleSpaceToDepth>(); + if (from_env<bool>(std::getenv("USE_SIMPLE_SPACETODEPTH"))) + { + // USE CPU VERSION OF SPACETODEPTH + auto rank = 4; + auto fn = nnfw::cpp14::make_unique<SimpleSpaceToDepth>(); + + fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank)); - fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank)); - builder.append("SpaceToDepth", std::move(fn)); + builder.append("SpaceToDepth", std::move(fn)); + } + else + { + if (::internal::arm_compute::isGpuMode()) // GPU + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size); + + builder.append("SpaceToDepth", std::move(fn)); + } + else // NEON + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) +{ + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index}; + + { // New block for assertions + + // Currently, only 4D NHWC input/output op_context are supported. + // The 4D array need to have exactly 2 spatial dimensions. + // TODO: Support arbitrary dimension in SpaceToBatchND. + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + assert(_ctx.at(block_size_index).shape().rank() == 1); + assert(_ctx.at(padding_size_index).shape().rank() == 2); + + const auto &output_shape = _ctx.at(output_index).shape(); + const auto &input_shape = _ctx.at(input_index).shape(); + const auto &block_size_shape = _ctx.at(block_size_index).shape(); + const auto &padding_size_shape = _ctx.at(padding_size_index).shape(); + + assert(output_shape.dim(3) == input_shape.dim(3)); + assert(block_size_shape.dim(0) == 2); + assert(padding_size_shape.dim(0) == 2); + assert(padding_size_shape.dim(1) == 2); + } + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); + + _builder.addShapeConstr(block_size_index, + asTensorInfo(asTensorShape(_ctx.at(block_size_index).shape()), + _ctx.at(block_size_index).type(), + _ctx.at(block_size_index).scale(), + _ctx.at(block_size_index).zeroPoint())); + + _builder.addShapeConstr(padding_size_index, + asTensorInfo(asTensorShape(_ctx.at(padding_size_index).shape()), + _ctx.at(padding_size_index).type(), + _ctx.at(padding_size_index).scale(), + _ctx.at(padding_size_index).zeroPoint())); + + if (_ctx.at(block_size_index).hasData()) + { + const auto rank = _ctx.at(input_index).shape().rank(); + const auto num_of_block_size = _ctx.at(block_size_index).shape().asVector(); + auto block_size_base = _ctx.at(block_size_index).data().base(); + auto block_size_type = _ctx.at(block_size_index).type(); + + switch (block_size_type) + { + case ANEURALNETWORKS_TENSOR_INT32: + { + auto initializer = [block_size_base, num_of_block_size, + rank](::arm_compute::ITensor &tensor) { + assert(num_of_block_size < 4); + for (size_t n = 0; n < num_of_block_size; ++n) + { + const int32_t *from = reinterpret_cast<const int32_t *>(block_size_base) + n; + int32_t *into = reinterpret_cast<int32_t *>( + tensor.ptr_to_element({ToARMComputeAxis(rank, n + 1).value()})); + *into = *from; + } + }; + _builder.addInitializer(block_size_index, initializer); + + break; + } + default: + { + throw std::runtime_error("Not supported"); + } + } + } + + if (_ctx.at(padding_size_index).hasData()) + { + const auto padding_size_shape = _ctx.at(padding_size_index).shape(); + const auto rank = _ctx.at(input_index).shape().rank(); + auto padding_size_base = _ctx.at(padding_size_index).data().base(); + auto padding_size_type = _ctx.at(padding_size_index).type(); + + switch (padding_size_type) + { + case ANEURALNETWORKS_TENSOR_INT32: + { + auto initializer = [padding_size_base, padding_size_shape, + rank](::arm_compute::ITensor &tensor) { + assert(padding_size_shape.dim(1) == 2); + assert(padding_size_shape.dim(0) < 4); + for (size_t n = 0; n < padding_size_shape.dim(0); ++n) + { + const int32_t *from = reinterpret_cast<const int32_t *>(padding_size_base) + + (n * padding_size_shape.dim(1)); + int32_t *into = reinterpret_cast<int32_t *>( + tensor.ptr_to_element({0, ToARMComputeAxis(rank, n + 1).value()})); + into[0] = from[0]; + into[1] = from[1]; + } + }; + _builder.addInitializer(padding_size_index, initializer); + break; + } + default: + { + throw std::runtime_error("Not supported"); + } + } + } + + // Construct operation parameters + struct Param + { + int output_index; + int input_index; + int block_size_index; + int padding_size_index; + int32_t rank; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + param.block_size_index = block_size_index.asInt(); + param.padding_size_index = padding_size_index.asInt(); + param.rank = _ctx.at(input_index).shape().rank(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index}); + auto padding_size_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_size_index}); + + // NOTE SimpleSpaceToBatchND is quite slow + if (from_env<bool>(std::getenv("USE_SIMPLE_SPACE_TO_BATCH_ND"))) + { + auto fn = nnfw::cpp14::make_unique<SimpleSpaceToBatchND>(); + + fn->configure(input_alloc, block_size_alloc, padding_size_alloc, output_alloc); + builder.append("SpaceToBatchND", std::move(fn)); + } + else if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc), + CAST_CL(output_alloc)); + builder.append("SpaceToBatchND", std::move(fn)); + } + else + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) +{ + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + assert(_ctx.at(block_size_index).hasData() == true); + + const int32_t *block_size = + reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base()); + + const auto &output_shape = _ctx.at(output_index).shape(); + const auto &input_shape = _ctx.at(input_index).shape(); + + assert((_ctx.at(block_size_index).data().size() / sizeof(int32_t)) == 2 && block_size[0] > 0 && + block_size[1] > 0); + { + assert(output_shape.dim(3) == input_shape.dim(3)); + assert(output_shape.dim(1) == input_shape.dim(1) * block_size[0]); + assert(output_shape.dim(2) == input_shape.dim(2) * block_size[1]); + assert(output_shape.dim(0) == input_shape.dim(0) / (block_size[0] * block_size[1])); + } + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr( + output_index, asTensorInfo(asTensorShape(output_shape, false), _ctx.at(output_index).type(), + _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr( + input_index, asTensorInfo(asTensorShape(input_shape, false), _ctx.at(input_index).type(), + _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input_index; + const int32_t *block_size; + int32_t rank; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + param.block_size = block_size; + param.rank = _ctx.at(input_index).shape().rank(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + // NOTE SimpleBatchToSpaceND is quite slow, but may be useful for debugging + if (from_env<bool>(std::getenv("USE_SIMPLE_BATCH_TO_SPACE_ND"))) + { + auto fn = nnfw::cpp14::make_unique<SimpleBatchToSpaceND>(); + + fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(param.rank)); + builder.append("BatchToSpaceND", std::move(fn)); + } + else if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceND>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size); + builder.append("BatchToSpaceND", std::move(fn)); + } + else + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } }; @@ -3550,9 +4606,6 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; - const auto ofm_shape = _ctx.at(ofm_index).shape(); - const auto ifm_shape = _ctx.at(ifm_index).shape(); - // Set Shape Constraints and TensorInfo _builder.addShapeConstr( ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), @@ -3583,10 +4636,10 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) param.ofm_index = ofm_index.asInt(); param.ifm_index = ifm_index.asInt(); - param.radius = 2 * ifm_shape.dim(3) + 1; // normSize = depth * 2 + 1 - param.alpha = 1.0f; // In the implementation to make alpha_ become 1 - param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) - param.bias = 0.0f; // Don't offset the reduction. + param.radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1 + param.alpha = 1.0f; // In the implementation to make alpha_ become 1 + param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) + param.bias = 0.0f; // Don't offset the reduction. auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); @@ -3598,7 +4651,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) if (::internal::arm_compute::isGpuMode()) { - auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>(); fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info); @@ -3606,9 +4659,9 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node) } else { - auto fn = nnfw::make_unique<::arm_compute::NENormalizationLayer>(); + auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>(); - fn->configure(CAST_NE(ifm_alloc), CAST_NE(ofm_alloc), norm_info); + fn->configure(ifm_alloc, ofm_alloc, norm_info); builder.append("L2Normalize", std::move(fn)); } @@ -3647,10 +4700,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node assert((ANEURALNETWORKS_PADDING_SAME == padding_type) || (ANEURALNETWORKS_PADDING_VALID == padding_type)); - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); struct Param { @@ -3678,7 +4733,7 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node param.stride.horizontal = hstride; param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) - ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh) + ? same_padding(ifm_shape, param.stride, kw, kh) : valid_padding(); param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>()); @@ -3731,9 +4786,6 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>(); const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>(); @@ -3745,10 +4797,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>(); const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>(); - _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), - _ctx.at(ofm_index).type())); - _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), - _ctx.at(ifm_index).type())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); // Construct operation parameters struct Param @@ -3876,11 +4930,27 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index}); auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index}); - auto fn = nnfw::make_unique<SimpleEmbeddingLookup>(); + if (from_env<bool>(std::getenv("USE_SIMPLE_EMBEDDINGLOOKUP"))) + { + auto fn = nnfw::cpp14::make_unique<SimpleEmbeddingLookup>(); - fn->configure(lookups_alloc, values_alloc, output_alloc); + fn->configure(lookups_alloc, values_alloc, output_alloc); - builder.append("EmbeddingLookup", std::move(fn)); + builder.append("EmbeddingLookup", std::move(fn)); + } + else if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>(); + + fn->configure(CAST_CL(values_alloc), CAST_CL(output_alloc), CAST_CL(lookups_alloc)); + + builder.append("EmbeddingLookup", std::move(fn)); + } + else + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } }; _builder.addStage(stage); @@ -3888,8 +4958,857 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node) { - // TODO Implement HashtableLookup - throw std::runtime_error("Not supported"); + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index hits_index{node.param().hits_index}; + const ::internal::tflite::operand::Index lookups_index{node.param().lookups_index}; + const ::internal::tflite::operand::Index values_index{node.param().values_index}; + const ::internal::tflite::operand::Index keys_index{node.param().keys_index}; + + const auto &lookups_obj = _ctx.at(lookups_index); + const auto &keys_obj = _ctx.at(keys_index); + const auto &hits_obj = _ctx.at(hits_index); + const auto &values_obj = _ctx.at(values_index); + const auto &output_obj = _ctx.at(output_index); + + assert(lookups_obj.type() == ANEURALNETWORKS_TENSOR_INT32); + assert(keys_obj.type() == ANEURALNETWORKS_TENSOR_INT32); + assert(hits_obj.type() == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM); + + const auto &lookups_shape = lookups_obj.shape(); + const auto &keys_shape = keys_obj.shape(); + const auto &hits_shape = hits_obj.shape(); + const auto &values_shape = values_obj.shape(); + const auto &output_shape = output_obj.shape(); + + assert(values_shape.rank() == output_shape.rank()); + + assert(lookups_shape.rank() == 1); + assert(keys_shape.rank() == 1); + assert(values_shape.dim(0) == keys_shape.dim(0)); + assert(lookups_shape.dim(0) == output_shape.dim(0)); + + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(hits_index, + asTensorInfo(asTensorShape(_ctx.at(hits_index).shape()), + _ctx.at(hits_index).type(), _ctx.at(hits_index).type(), + _ctx.at(hits_index).zeroPoint())); + + _builder.addShapeConstr(lookups_index, asTensorInfo(asTensorShape(_ctx.at(lookups_index).shape()), + _ctx.at(lookups_index).type(), + _ctx.at(lookups_index).scale(), + _ctx.at(lookups_index).zeroPoint())); + _builder.addShapeConstr(values_index, + asTensorInfo(asTensorShape(_ctx.at(values_index).shape()), + _ctx.at(values_index).type(), _ctx.at(values_index).scale(), + _ctx.at(values_index).zeroPoint())); + _builder.addShapeConstr(keys_index, + asTensorInfo(asTensorShape(_ctx.at(keys_index).shape()), + _ctx.at(keys_index).type(), _ctx.at(keys_index).scale(), + _ctx.at(keys_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int32_t output_index; + int32_t hits_index; + int32_t lookups_index; + int32_t values_index; + int32_t keys_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.hits_index = hits_index.asInt(); + param.lookups_index = lookups_index.asInt(); + param.values_index = values_index.asInt(); + param.keys_index = keys_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto hits_alloc = ctx.at(::internal::tflite::operand::Index{param.hits_index}); + auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index}); + auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index}); + auto keys_alloc = ctx.at(::internal::tflite::operand::Index{param.keys_index}); + + if (from_env<bool>(std::getenv("USE_SIMPLE_HASHTABLELOOKUP"))) + { + auto fn = nnfw::cpp14::make_unique<SimpleHashtableLookupLayer>(); + + fn->configure(lookups_alloc, keys_alloc, values_alloc, output_alloc, hits_alloc); + + builder.append("HashtableLookup", std::move(fn)); + } + else if (::internal::arm_compute::isGpuMode()) // GPU + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>(); + + fn->configure(CAST_CL(lookups_alloc), CAST_CL(keys_alloc), CAST_CL(values_alloc), + CAST_CL(output_alloc), CAST_CL(hits_alloc)); + + builder.append("HashtableLookup", std::move(fn)); + } + else // NEON + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node) +{ + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index radius_index{node.param().radius_index}; + const ::internal::tflite::operand::Index bias_index{node.param().bias_index}; + const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index}; + const ::internal::tflite::operand::Index beta_index{node.param().beta_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + int32_t radius; + float bias; + float alpha; + float beta; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + + param.radius = _ctx.at(radius_index).asScalar<int32_t>(); + param.alpha = _ctx.at(alpha_index).asScalar<float>(); + param.beta = _ctx.at(beta_index).asScalar<float>(); + param.bias = _ctx.at(bias_index).asScalar<float>(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + const auto norm_info = + ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, param.radius, + param.alpha, param.beta, param.bias, false); + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info); + + builder.append("LocalResponseNormalization", std::move(fn)); + } + else + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>(); + + fn->configure(ifm_alloc, ofm_alloc, norm_info); + + builder.append("LocalResponseNormalization", std::move(fn)); + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node) +{ + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input_index{node.param().input_index}; + const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index}; + + assert(_ctx.at(input_index).shape().rank() == 4); + assert(_ctx.at(output_index).shape().rank() == 4); + + int32_t block_size = _ctx.at(block_size_index).asScalar<int32_t>(); + assert(block_size > 0); + + { // assertions block + const auto output_shape = _ctx.at(output_index).shape(); + const auto input_shape = _ctx.at(input_index).shape(); + assert(output_shape.dim(0) == input_shape.dim(0)); + assert(output_shape.dim(1) == input_shape.dim(1) * block_size); + assert(output_shape.dim(2) == input_shape.dim(2) * block_size); + assert(input_shape.dim(3) % (block_size * block_size) == 0); + assert(output_shape.dim(3) == input_shape.dim(3) / (block_size * block_size)); + } + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + _builder.addShapeConstr(input_index, + asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false), + _ctx.at(input_index).type(), _ctx.at(input_index).scale(), + _ctx.at(input_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input_index; + int32_t block_size; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input_index = input_index.asInt(); + param.block_size = block_size; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index}); + + if (from_env<bool>(std::getenv("USE_SIMPLE_DEPTHTOSPACE"))) + { + // USE CPU VERSION OF DEPTHTOSPACE + auto rank = 4; + auto fn = nnfw::cpp14::make_unique<SimpleDepthToSpace>(); + + fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank)); + + builder.append("DepthToSpace", std::move(fn)); + } + else + { + if (::internal::arm_compute::isGpuMode()) // GPU + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>(); + + fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size); + + builder.append("DepthToSpace", std::move(fn)); + } + else // NEON + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::Unpack::Node &node) +{ + VERBOSE(Unpack) << "Configure Unpack operation" << std::endl; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + uint32_t input_rank = _ctx.at(ifm_index).shape().rank(); + + assert(input_rank == 4 || input_rank == 3 || input_rank == 2); + _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), + _ctx.at(ifm_index).type())); + + int32_t axis = + _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>(); + // int32_t num_split = + // _ctx.at(::internal::tflite::operand::Index{node.param().num_split_index}).asScalar<int32_t>(); + + for (const auto &index : node.param().ofm_indexes) + { + const ::internal::tflite::operand::Index ofm_index{index}; + _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), + _ctx.at(ofm_index).type())); + } + + struct Param + { + std::vector<int32_t> ofm_indexes; + int ifm_index; + int axis; + }; + + if (input_rank == 4) + { + Param param; + param.ifm_index = ifm_index.asInt(); + param.axis = axis; + for (const auto &index : node.param().ofm_indexes) + { + param.ofm_indexes.push_back(index); + } + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<SimpleUnpackLayer>(); + std::vector<::arm_compute::ICLTensor *> outputs; + for (const auto &index : param.ofm_indexes) + { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{index}); + outputs.push_back(CAST_CL(output_alloc)); + } + fn->configure(CAST_CL(input_alloc), outputs, param.axis); + + builder.append("Unpack", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); + } + else if (input_rank == 3) + { + // TODO: generate test case for this and generalize 4D method all cases. + throw std::runtime_error("UNPACK_3D not implemented"); + } + else if (input_rank == 2) + { + throw std::runtime_error("UNPACK_2D not implemented"); + } + else + { + throw std::runtime_error("UNPACK axis is not valid"); + } +} + +void Planner::visit(const ::internal::tflite::op::Pack::Node &node) +{ + VERBOSE(Pack) << "Configure Pack operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const uint32_t output_rank = _ctx.at(ofm_index).shape().rank(); + const uint32_t input_rank = output_rank - 1; + + assert(output_rank == 4 || output_rank == 3 || output_rank == 2); + + for (const auto &index : node.param().ifm_indexes) + { + const ::internal::tflite::operand::Index ifm_index{index}; + assert(_ctx.at(ifm_index).shape().rank() == input_rank); + _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), + _ctx.at(ifm_index).type())); + } + + _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), + _ctx.at(ofm_index).type())); + + int32_t axis = + _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>(); + + struct Param + { + std::vector<int32_t> ifm_indexes; + int ofm_index; + int axis; + }; + + if (input_rank == 3) + { + Param param; + param.ofm_index = ofm_index.asInt(); + param.axis = axis; + + // TODO: Fix this once all permutations are present. + if (param.axis != 0) + { + throw std::runtime_error("This axis not supported, some 4D permutations are missing"); + } + + for (const auto &index : node.param().ifm_indexes) + { + param.ifm_indexes.push_back(index); + } + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<SimplePackLayer>(); + std::vector<::arm_compute::ICLTensor *> inputs; + for (const auto &index : param.ifm_indexes) + { + auto input_alloc = ctx.at(::internal::tflite::operand::Index{index}); + inputs.push_back(CAST_CL(input_alloc)); + } + fn->configure(inputs, CAST_CL(output_alloc), param.axis); + + builder.append("Pack", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); + } + else if (input_rank == 2) + { + // TODO: generate test case for this and generalize 4D method all cases. + throw std::runtime_error("PACK_2D not implemented"); + } + else if (input_rank == 1) + { + throw std::runtime_error("PACK_1D not implemented"); + } + else + { + throw std::runtime_error("PACK axis is not valid"); + } +} + +void Planner::visit(const ::internal::tflite::op::Neg::Node &node) +{ + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + // NOTE SimpleNeg is quite slow, but may be useful for debugging + if (from_env<bool>(std::getenv("USE_SIMPLE_NEG"))) + { + auto fn = nnfw::cpp14::make_unique<SimpleNeg>(); + + fn->configure(ifm_alloc, ofm_alloc); + builder.append("Neg", std::move(fn)); + } + else if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc)); + builder.append("Neg", std::move(fn)); + } + else + { + // TODO Enable NEON Support + throw std::runtime_error("Not supported, yet"); + } + + }; + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::Exp::Node &node) +{ + VERBOSE(Exp) << "Configure Exp operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + + // Set shape constraints + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + struct Param + { + int ofm_index; + int ifm_index; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExp>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc)); + + builder.append("Exp", std::move(fn)); + } + else + { + throw std::runtime_error("Not supported"); + } + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node) +{ + VERBOSE(ReduceSum) << "Configure ReduceSum operation" << std::endl; + + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index axis_index{node.param().axis_index}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + const auto ofm_shape = _ctx.at(ofm_index).shape(); + const auto axis_shape = _ctx.at(axis_index).shape(); + + assert(ifm_shape.rank() <= 4); + assert(ofm_shape.rank() <= ifm_shape.rank()); + assert(_ctx.at(axis_index).hasData()); + assert(axis_shape.rank() == 0 || axis_shape.rank() == 1); + + // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only + // supports cases reducing height and width or reducing depth. + // TODO We have to support all cases of dimensions up to 4. + // For correct permuting, we have to set output's shape to be equal in dimension position of the + // input. But the positions of the same dimensions in the input and output may be set differently. + // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original + // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to + // extend it in 4 dimensions, it should be {1,1,3,5}. + // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of + // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the + // next operation is not desired. + if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank()) + { + if (ofm_shape.rank() == 2) + { + // Reducing HW + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1)); + } + else if (ofm_shape.rank() == 3) + { + // Reducing C or + // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1) + assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) && + ifm_shape.dim(2) == ofm_shape.dim(2) || + (ifm_shape.dim(0) == ofm_shape.dim(0) && + (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) && + ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1)); + } + } + + // Set shape constraints + _builder.addShapeConstr( + ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(), + _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint())); + _builder.addShapeConstr( + ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(), + _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint())); + + uint32_t input_rank = ifm_shape.rank(); + std::set<uint32_t> axis; + int32_t axis_rank = axis_shape.rank(); + + if (axis_rank == 0) + { + int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>(); + if (axis_value < 0) + { + axis_value += input_rank; + } + axis.insert(ToARMComputeAxis(input_rank, axis_value).value()); + } + else if (axis_rank == 1) + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().asVector(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have to + // find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (uint32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n); + if (axis_value < 0) + { + axis_value += input_rank; + } + axis.insert(ToARMComputeAxis(input_rank, axis_value).value()); + } + } + else + { + throw std::runtime_error("Not supported axis"); + } + + struct Param + { + int ofm_index; + int ifm_index; + std::set<uint32_t> axis; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.axis = axis; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); + + fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis, + ::arm_compute::ReduceOperation::SUM); + + builder.append("ReduceSum", std::move(fn)); + } + else + throw std::runtime_error("Not supported, yet"); + }; + + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::Abs::Node &node) +{ + // TODO Implement Abs op + throw std::runtime_error("Not supported yet"); +} + +void Planner::visit(const ::internal::tflite::op::NotEqual::Node &node) +{ + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input1_index{node.param().input1_index}; + const ::internal::tflite::operand::Index input2_index{node.param().input2_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + + if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank()); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape()) + .extendRank(broadcast_rank); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape()) + .extendRank(broadcast_rank); + } + _builder.addShapeConstr(input1_index, + asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()), + _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(), + _ctx.at(input1_index).zeroPoint())); + _builder.addShapeConstr(input2_index, + asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()), + _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(), + _ctx.at(input2_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input1_index; + int input2_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input1_index = input1_index.asInt(); + param.input2_index = input2_index.asInt(); + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index}); + auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>(); + + fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc), + ::arm_compute::ComparisonOperation::NOT_EQUAL); + + builder.append("NotEqual", std::move(fn)); + } + else + { + // TODO Add NEON support + + throw std::runtime_error("Not supported yet"); + } + }; + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::LogicalAnd::Node &node) +{ + VERBOSE(Logical_AND) << "Configure Logical_AND operation" << std::endl; + + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input1_index{node.param().input1_index}; + const ::internal::tflite::operand::Index input2_index{node.param().input2_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + + if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank()); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape()) + .extendRank(broadcast_rank); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape()) + .extendRank(broadcast_rank); + } + _builder.addShapeConstr(input1_index, + asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()), + _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(), + _ctx.at(input1_index).zeroPoint())); + _builder.addShapeConstr(input2_index, + asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()), + _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(), + _ctx.at(input2_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input1_index; + int input2_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input1_index = input1_index.asInt(); + param.input2_index = input2_index.asInt(); + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index}); + auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index}); + + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>(); + + fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc), + ::arm_compute::BinaryLogicalOperation::AND); + + builder.append("LogicalAnd", std::move(fn)); + } + else + { + // TODO Add NEON support + + throw std::runtime_error("Not supported yet"); + } + }; + _builder.addStage(stage); +} + +void Planner::visit(const ::internal::tflite::op::LogicalNot::Node &node) +{ + // TODO Implement LogicalNot op + throw std::runtime_error("Not supported yet"); +} + +void Planner::visit(const ::internal::tflite::op::LogicalOr::Node &node) +{ + VERBOSE(LogicalOr) << "Configure LogicalOr operation" << std::endl; + + const ::internal::tflite::operand::Index output_index{node.param().output_index}; + const ::internal::tflite::operand::Index input1_index{node.param().input1_index}; + const ::internal::tflite::operand::Index input2_index{node.param().input2_index}; + + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(output_index, + asTensorInfo(asTensorShape(_ctx.at(output_index).shape()), + _ctx.at(output_index).type(), _ctx.at(output_index).scale(), + _ctx.at(output_index).zeroPoint())); + + if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape())) + { + const auto broadcast_rank = + std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank()); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape()) + .extendRank(broadcast_rank); + const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape()) + .extendRank(broadcast_rank); + } + + _builder.addShapeConstr(input1_index, + asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()), + _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(), + _ctx.at(input1_index).zeroPoint())); + _builder.addShapeConstr(input2_index, + asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()), + _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(), + _ctx.at(input2_index).zeroPoint())); + + // Construct operation parameters + struct Param + { + int output_index; + int input1_index; + int input2_index; + }; + + Param param; + + param.output_index = output_index.asInt(); + param.input1_index = input1_index.asInt(); + param.input2_index = input2_index.asInt(); + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index}); + auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index}); + auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index}); + if (::internal::arm_compute::isGpuMode()) + { + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>(); + + fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc), + ::arm_compute::BinaryLogicalOperation::OR); + + builder.append("LogicalOr", std::move(fn)); + } + else + { + // TODO Add NEON support + + throw std::runtime_error("Not supported yet"); + } + }; + _builder.addStage(stage); } class AllocationContext final : public IAllocationContext @@ -3939,32 +5858,69 @@ private: ::internal::arm_compute::Plan &_plan; }; +/** + * @brief Class to provide methods of compilation plan builder + */ class PlanBuilder final : public IPlanBuilder { public: + /** + * @brief Construct a new PlanBuilder object with Plan + * @param [in] plan The Plan object + */ PlanBuilder(::internal::arm_compute::Plan &plan) : _plan{plan} { // DO NOTHING } public: + /** + * @brief Add TensorInfo with Shape Constraints + * @param [in] ind Index of operand + * @param [in] info TensorInfo value to set to index of operand + * @return N/A + */ void addShapeConstr(const ::internal::tflite::operand::Index &ind, const ::arm_compute::TensorInfo &info) override; public: + /** + * @brief Add Subsumption constraints + * @param [in] ind Index of operand + * @param [in] base Index of base operand of Subsumption + * @param [in] offset Offset of Subsumption + * @param [in] shape Shape of Subsumption + * @param [in] extend_parent extend_parent value of Subsumption + * @return N/A + */ void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind, const ::internal::tflite::operand::Index &base, const ::arm_compute::Coordinates &offset, const ::arm_compute::TensorShape &shape, bool extend_parent) override; public: + /** + * @brief Add Initializer lambda with ITensor param + * @param [in] ind Index of operand + * @param [in] initializer Initializer to add + * @return N/A + */ void addInitializer(const ::internal::tflite::operand::Index &ind, const Initializer &initializer) override; public: + /** + * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params + * @param [in] stage Stage to add + * @return N/A + */ void addStage(const Stage &stage) override; public: + /** + * @brief Finilize(build) the Plan + * @return N/A + */ void finalize(void) const; private: @@ -4197,6 +6153,8 @@ void PlanBuilder::finalize(void) const auto type = operands.at(operand_idx).type(); auto shape = operands.at(operand_idx).shape(); + // Need to support scalar types (ANEURALNETWORKS_FLOAT32 and ANEURALNETWORKS_INT32) + // for rank > 1 tensor, because it can be operand of broadcast operation switch (rank) { case 0: // scalar @@ -4240,12 +6198,14 @@ void PlanBuilder::finalize(void) const auto size = shape.asVector(); switch (type) { + case ANEURALNETWORKS_FLOAT32: case ANEURALNETWORKS_TENSOR_FLOAT32: { auto initializer = std::bind(initVectorTensor<float>, _1, base, size); _plan.operands().at(operand_idx).access(initializer); break; } + case ANEURALNETWORKS_INT32: case ANEURALNETWORKS_TENSOR_INT32: { auto initializer = std::bind(initVectorTensor<int32_t>, _1, base, size); @@ -4270,12 +6230,14 @@ void PlanBuilder::finalize(void) const auto size = operands.at(operand_idx).data().size(); switch (type) { + case ANEURALNETWORKS_FLOAT32: case ANEURALNETWORKS_TENSOR_FLOAT32: { auto initializer = std::bind(initMatrixTensor<float>, _1, matrix_shape, base, size); _plan.operands().at(operand_idx).access(initializer); break; } + case ANEURALNETWORKS_INT32: case ANEURALNETWORKS_TENSOR_INT32: { auto initializer = std::bind(initMatrixTensor<int32_t>, _1, matrix_shape, base, size); @@ -4300,12 +6262,14 @@ void PlanBuilder::finalize(void) const auto size = operands.at(operand_idx).data().size(); switch (type) { + case ANEURALNETWORKS_FLOAT32: case ANEURALNETWORKS_TENSOR_FLOAT32: { auto initializer = std::bind(initTensor3D<float>, _1, tensor_shape, base, size); _plan.operands().at(operand_idx).access(initializer); break; } + case ANEURALNETWORKS_INT32: case ANEURALNETWORKS_TENSOR_INT32: { auto initializer = std::bind(initTensor3D<int32_t>, _1, tensor_shape, base, size); @@ -4330,12 +6294,14 @@ void PlanBuilder::finalize(void) const auto size = operands.at(operand_idx).data().size(); switch (type) { + case ANEURALNETWORKS_FLOAT32: case ANEURALNETWORKS_TENSOR_FLOAT32: { auto initializer = std::bind(initFeatureTensor<float>, _1, feature_shape, base, size); _plan.operands().at(operand_idx).access(initializer); break; } + case ANEURALNETWORKS_INT32: case ANEURALNETWORKS_TENSOR_INT32: { auto initializer = @@ -4417,8 +6383,10 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation) if (::internal::arm_compute::isGpuMode()) { arm_compute::CLScheduler::get().default_init(); - arm_compute::CLKernelLibraryEx::get().init("./cl_kernels/", cl::Context::getDefault(), - cl::Device::getDefault()); + // NOTE CLKernelLibraryEx must use the same context as CLScheduler + // It did not check whether another device is available. + arm_compute::CLKernelLibraryEx::get().init( + "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault()); } const auto &operands = compilation->plan().model().operands(); diff --git a/runtimes/pure_arm_compute/src/compilation.h b/runtimes/pure_arm_compute/src/compilation.h index dd3613b2d..1a06d06b9 100644 --- a/runtimes/pure_arm_compute/src/compilation.h +++ b/runtimes/pure_arm_compute/src/compilation.h @@ -14,15 +14,28 @@ * limitations under the License. */ +/** + * @file compilation.h + * @brief This file defines ANeuralNetworksCompilation class for handling Compilation NNAPI + * @ingroup COM_AI_RUNTIME + */ + #ifndef __COMPILATION_H__ #define __COMPILATION_H__ #include "internal/Model.h" #include "internal/arm_compute.h" +/** + * @brief struct to define Compilation of NNAPI + */ struct ANeuralNetworksCompilation { public: + /** + * @brief Construct with params + * @param [in] model Pointer of internal::tflite::Model to set internal::arm_compute::Plan + */ ANeuralNetworksCompilation(const std::shared_ptr<const internal::tflite::Model> &model) : _plan{new internal::arm_compute::Plan{model}} { @@ -30,11 +43,28 @@ public: } public: + /** + * @brief Get reference of internal::arm_compute::Plan + * @return Reference of internal::arm_compute::Plan + */ internal::arm_compute::Plan &plan(void) { return *_plan; } public: + /** + * @brief Publish internal Plan to param + * @param [out] plan Pointer of internal::arm_compute::Plan to be set + * @return N/A + */ void publish(std::shared_ptr<const internal::arm_compute::Plan> &plan) { plan = _plan; } + /** + * @brief Get @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false + * @return @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false + */ bool isFinished(void) { return _isFinished; } + /** + * @brief Mark compilation process finished + * @return N/A + */ void markAsFinished() { _isFinished = true; } private: diff --git a/runtimes/pure_arm_compute/src/event.h b/runtimes/pure_arm_compute/src/event.h index 5d41dca84..b5595583c 100644 --- a/runtimes/pure_arm_compute/src/event.h +++ b/runtimes/pure_arm_compute/src/event.h @@ -14,9 +14,18 @@ * limitations under the License. */ +/** + * @file event.h + * @brief This file defines ANeuralNetworksEvent struct for handling Event NNAPI + * @ingroup COM_AI_RUNTIME + */ + #ifndef __EVENT_H__ #define __EVENT_H__ +/** + * @brief struct to define Event of NNAPI + */ struct ANeuralNetworksEvent { }; diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc index 778a22155..b7eba1cef 100644 --- a/runtimes/pure_arm_compute/src/execution.cc +++ b/runtimes/pure_arm_compute/src/execution.cc @@ -18,8 +18,8 @@ #include "compilation.h" #include "execution.h" -#include "util/profiling/profiling.h" -#include "util/profiling/profiler.h" +#include "profiling/profiling.h" +#include "profiling/profiler.h" #include "event.h" #include "internal/VectorSource.h" @@ -34,7 +34,7 @@ #include "internal/Tensor3DSink.h" #include "internal/FeatureSink.h" -#include "util/feature/IndexIterator.h" +#include "misc/feature/IndexIterator.h" #include <arm_compute/runtime/CL/CLScheduler.h> @@ -70,7 +70,7 @@ static void asVectorSource(ANeuralNetworksExecution *execution, int32_t type, in } static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::matrix::Shape &shape, const void *buffer, + const nnfw::misc::matrix::Shape &shape, const void *buffer, size_t length) { switch (type) @@ -100,7 +100,7 @@ static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, in } static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::tensor::Shape &shape, const void *buffer, + const nnfw::misc::tensor::Shape &shape, const void *buffer, size_t length) { switch (type) @@ -130,7 +130,7 @@ static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, } static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::tensor::Shape &shape, const void *buffer, + const nnfw::misc::tensor::Shape &shape, const void *buffer, size_t length) { switch (type) @@ -160,7 +160,7 @@ static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, in } static void asFeatureSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::feature::Shape &shape, const void *buffer, + const nnfw::misc::feature::Shape &shape, const void *buffer, size_t length) { switch (type) @@ -244,7 +244,7 @@ static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int3 } static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::feature::Shape &shape, void *buffer, size_t length) + const nnfw::misc::feature::Shape &shape, void *buffer, size_t length) { switch (type) { @@ -272,7 +272,7 @@ static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int } static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::tensor::Shape &shape, void *buffer, size_t length) + const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length) { assert(shape.rank() == 3); @@ -302,7 +302,7 @@ static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, in } static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index, - const nnfw::util::tensor::Shape &shape, void *buffer, size_t length) + const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length) { switch (type) { @@ -420,9 +420,9 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32 // squeeze(shape) eliminates all the dimensions whose dimensionality is 1 // For example, squeeze([3, 1, 3]) returns [3, 3] -static nnfw::util::tensor::Shape squeeze(const nnfw::util::tensor::Shape &shape) +static nnfw::misc::tensor::Shape squeeze(const nnfw::misc::tensor::Shape &shape) { - nnfw::util::tensor::Shape res(0); + nnfw::misc::tensor::Shape res(0); for (uint32_t axis = 0; axis < shape.rank(); ++axis) { @@ -497,7 +497,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution, return ANEURALNETWORKS_UNEXPECTED_NULL; } - const bool sync = profiling::Context::get().sync().enabled(); + const bool sync = profiling::Context::get().sync(); const auto &plan = execution->plan(); const auto &model = plan.model(); diff --git a/runtimes/pure_arm_compute/src/execution.h b/runtimes/pure_arm_compute/src/execution.h index c036fe2c8..f55ab3fbf 100644 --- a/runtimes/pure_arm_compute/src/execution.h +++ b/runtimes/pure_arm_compute/src/execution.h @@ -14,6 +14,13 @@ * limitations under the License. */ +/** + * @file execution.h + * @brief This file contains ANeuralNetworksExecution class for handling Execution NNAPI such as + * ANeuralNetworksExecution_create, ANeuralNetworksExecution_setInput + * @ingroup COM_AI_RUNTIME + */ + #ifndef __EXECUTION_H__ #define __EXECUTION_H__ @@ -21,9 +28,16 @@ #include "internal/Sink.h" #include "internal/Source.h" +/** + * @brief struct to express Execution of NNAPI + */ struct ANeuralNetworksExecution { public: + /** + * @brief Construct with params + * @param [in] plan Pointer to get internal::arm_compute::Plan + */ ANeuralNetworksExecution(const std::shared_ptr<const internal::arm_compute::Plan> &plan) : _plan{plan} { @@ -32,31 +46,69 @@ public: } public: + /** + * @brief Get reference of internal::arm_compute::Plan + * @return Const reference of internal::arm_compute::Plan + */ const internal::arm_compute::Plan &plan(void) const { return *_plan; } private: std::shared_ptr<const internal::arm_compute::Plan> _plan; public: + /** + * @brief Set the nth source with param + * @param [in] n Index of the nth source + * @param [in] source Pointer to set the nth source from + * @return N/A + */ // TODO Use InputIndex instead of int void source(int n, std::unique_ptr<Source> &&source) { _sources.at(n) = std::move(source); } + /** + * @brief Set the nth source with param + * @param [in] n Index of the nth source + * @param [in] args Arguments to set the nth source from + * @return N/A + */ template <typename T, typename... Args> void source(int n, Args &&... args) { source(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}}); } public: + /** + * @brief Get the nth source + * @param [in] n Index of the nth source + * @return Const reference of Source + */ const Source &source(int n) const { return *(_sources.at(n)); } public: + /** + * @brief Set the nth sink with param + * @param [in] n Index of the nth sink + * @param [in] sink Pointer to set the nth sink from + * @return N/A + */ // TODO Use OutputIndex instead of int void sink(int n, std::unique_ptr<Sink> &&sink) { _sinks.at(n) = std::move(sink); } + /** + * @brief Set the nth sink with param + * @param [in] n Index of the nth sink + * @param [in] args Arguments to set the nth sink from + * @return N/A + */ template <typename T, typename... Args> void sink(int n, Args &&... args) { sink(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}}); } public: + /** + * @brief Get the nth sink + * @param [in] n Index of the nth sink + * @return Const reference of Sink + */ const Sink &sink(int n) const { return *(_sinks.at(n)); } private: diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSink.h b/runtimes/pure_arm_compute/src/internal/FeatureSink.h index 9e4412c2a..7c6884141 100644 --- a/runtimes/pure_arm_compute/src/internal/FeatureSink.h +++ b/runtimes/pure_arm_compute/src/internal/FeatureSink.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       FeatureSink.h + * @brief      This file contains FeatureSink class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_FEATURE_SINK_H__ #define __INTERNAL_FEATURE_SINK_H__ @@ -21,22 +27,36 @@ #include "internal/nnapi/feature/View.h" #include "internal/arm_compute/feature/View.h" -#include <util/feature/Shape.h> -#include "util/feature/IndexIterator.h" +#include <misc/feature/Shape.h> +#include "misc/feature/IndexIterator.h" -// -// FeatureSink -// +/** + * @brief Class to store Feature(4D) output data. + * This is for pulling data to internal tensor from other tensor. + * @tparam T Type of the data elements + */ template <typename T> class FeatureSink final : public Sink { public: - FeatureSink(const nnfw::util::feature::Shape &shape, T *base, const size_t size) + /** + * @brief Construct a FeatureSink object + * + * @param[in] shape 4D tensor dimensions for this feature + * @param[in] base Base pointer of the actual data + * @param[in] size Size of the data + */ + FeatureSink(const nnfw::misc::feature::Shape &shape, T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Pull the data into the internal structure + * @param[in] tensor The tensor which contains source data + * @return N/A + */ void pull(::arm_compute::ITensor &tensor) const override { const ::internal::arm_compute::feature::View<T> from{&tensor}; @@ -44,7 +64,7 @@ public: // Inevitably casting must be done. ::internal::nnapi::feature::View<T> into{_shape, _base, _size}; - ::nnfw::util::feature::iterate(_shape) + ::nnfw::misc::feature::iterate(_shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { const auto value = from.at(batch, ch, row, col); into.at(batch, ch, row, col) = value; @@ -52,7 +72,7 @@ public: } private: - const nnfw::util::feature::Shape _shape; + const nnfw::misc::feature::Shape _shape; T *const _base; const size_t _size; }; diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSource.h b/runtimes/pure_arm_compute/src/internal/FeatureSource.h index fca56e341..772beb701 100644 --- a/runtimes/pure_arm_compute/src/internal/FeatureSource.h +++ b/runtimes/pure_arm_compute/src/internal/FeatureSource.h @@ -14,31 +14,54 @@ * limitations under the License. */ +/** + * @file       FeatureSource.h + * @brief      This file contains FeatureSource class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_FEATURE_SOURCE_H__ #define __INTERNAL_FEATURE_SOURCE_H__ -#include <util/feature/Shape.h> -#include <util/feature/IndexIterator.h> +#include <misc/feature/Shape.h> +#include <misc/feature/IndexIterator.h> #include "internal/nnapi/feature/Reader.h" #include "internal/arm_compute/feature/View.h" +/** + * @brief Class to store feature(4D) input data. + * This is for push out the data to another tensor. + * @tparam T Type of the data elements + */ template <typename T> class FeatureSource final : public Source { public: - FeatureSource(const nnfw::util::feature::Shape &shape, const T *base, const size_t size) + /** + * @brief Construct a FeatureSource object + * + * @param[in] shape 4D tensor dimensions for this feature + * @param[in] base Base pointer of the actual data + * @param[in] size Size of the data + */ + FeatureSource(const nnfw::misc::feature::Shape &shape, const T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Push the data out to the another tensor + * @param[out] The tensor that output data will be stored + * @return N/A + */ void push(::arm_compute::ITensor &tensor) const override { const ::internal::nnapi::feature::Reader<T> from{_shape, _base, _size}; ::internal::arm_compute::feature::View<T> into{&tensor}; - ::nnfw::util::feature::iterate(_shape) + ::nnfw::misc::feature::iterate(_shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { const auto value = from.at(batch, ch, row, col); into.at(batch, ch, row, col) = value; @@ -46,7 +69,7 @@ public: } private: - const nnfw::util::feature::Shape _shape; + const nnfw::misc::feature::Shape _shape; const T *const _base; const size_t _size; }; diff --git a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h index aa1e67177..2a6e2a743 100644 --- a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h +++ b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file IExecutionBuilder.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines interface of ExecutionBuilder + */ #ifndef __INTERNAL_IEXECUTION_BUILDER_H__ #define __INTERNAL_IEXECUTION_BUILDER_H__ @@ -22,10 +27,22 @@ #include <memory> #include <string> +/** + * @brief Struct to define interface of ExecutionBuilder + */ struct IExecutionBuilder { + /** + * @brief Destroy the IExecutionBuilder object + */ virtual ~IExecutionBuilder() = default; + /** + * @brief Append function to execute + * @param[in] name Name of function + * @param[in] f Function to append + * @return N/A + */ virtual void append(const std::string &name, std::unique_ptr<::arm_compute::IFunction> &&f) = 0; }; diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSink.h b/runtimes/pure_arm_compute/src/internal/MatrixSink.h index 32bd49dc6..23ecc112b 100644 --- a/runtimes/pure_arm_compute/src/internal/MatrixSink.h +++ b/runtimes/pure_arm_compute/src/internal/MatrixSink.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file MatrixSink.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines MatrixSink class + */ #ifndef __INTERNAL_MATRIX_SINK_H__ #define __INTERNAL_MATRIX_SINK_H__ @@ -27,9 +32,19 @@ #include <cstring> #include <cassert> +/** + * @brief Class to get matrix data from arm compute tensor + */ template <typename T> class MatrixSink final : public Sink { public: + /** + * @brief Construct a new Matrix Sink object + * @param[in] H Height of matrix + * @param[in] W Width of matrix + * @param[in] base Pointer to get data + * @param[in] size Size of matrix + */ MatrixSink(const int32_t H, const int32_t W, T *base, const size_t size) : _height{H}, _width{W}, _base{base} { @@ -37,6 +52,11 @@ public: } public: + /** + * @brief Get matrix data from arm compute tensor to base + * @param[in] tensor Tensor object of arm compute to get data + * @return N/A + */ void pull(::arm_compute::ITensor &tensor) const override { assert(tensor.info()->dimension(0) == _width); diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSource.h b/runtimes/pure_arm_compute/src/internal/MatrixSource.h index 2f5d92484..71d6a804f 100644 --- a/runtimes/pure_arm_compute/src/internal/MatrixSource.h +++ b/runtimes/pure_arm_compute/src/internal/MatrixSource.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file MatrixSource.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines MatrixSource class + */ #ifndef __INTERNAL_MATRIX_SOURCE_H__ #define __INTERNAL_MATRIX_SOURCE_H__ @@ -23,16 +28,30 @@ #include "internal/Source.h" +/** + * @brief Class to push matrix data to arm compute tensor + */ template <typename T> class MatrixSource final : public Source { public: - MatrixSource(const nnfw::util::matrix::Shape &shape, const T *base, const size_t size) + /** + * @brief Construct a new MatrixSource object + * @param[in] shape Shape of matrix + * @param[in] base Pointer of matrix data to push + * @param[in] size Size of matrix + */ + MatrixSource(const nnfw::misc::matrix::Shape &shape, const T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // do nothing } public: + /** + * @brief Push matrix data to arm compute tensor + * @param[out] tensor Tensor object of arm compute to push matrix data + * @return N/A + */ void push(::arm_compute::ITensor &tensor) const override { using ::arm_compute::Window; @@ -55,7 +74,7 @@ public: } private: - const nnfw::util::matrix::Shape _shape; + const nnfw::misc::matrix::Shape _shape; const T *const _base; const size_t _size; }; diff --git a/runtimes/pure_arm_compute/src/internal/Model.cc b/runtimes/pure_arm_compute/src/internal/Model.cc index 3a31f9911..03753fea2 100644 --- a/runtimes/pure_arm_compute/src/internal/Model.cc +++ b/runtimes/pure_arm_compute/src/internal/Model.cc @@ -16,8 +16,6 @@ #include "internal/Model.h" -#include <cassert> - namespace internal { namespace tflite @@ -25,7 +23,7 @@ namespace tflite namespace operand { -Shape::Shape(uint32_t rank) : nnfw::util::tensor::Shape(rank) +Shape::Shape(uint32_t rank) : nnfw::misc::tensor::Shape(rank) { // DO NOTHING } @@ -37,17 +35,17 @@ int32_t Shape::asVector(void) const return dim(0); } -nnfw::util::matrix::Shape Shape::asMatrix(void) const +nnfw::misc::matrix::Shape Shape::asMatrix(void) const { assert(rank() == 2); const auto height = dim(0); const auto width = dim(1); - return nnfw::util::matrix::Shape(height, width); + return nnfw::misc::matrix::Shape(height, width); } -nnfw::util::feature::Shape Shape::asFeature(void) const +nnfw::misc::feature::Shape Shape::asFeature(void) const { assert(rank() == 4); @@ -62,15 +60,15 @@ nnfw::util::feature::Shape Shape::asFeature(void) const const auto height = dim(1); const auto width = dim(2); - return nnfw::util::feature::Shape(batch, depth, height, width); + return nnfw::misc::feature::Shape(batch, depth, height, width); } -nnfw::util::tensor::Shape Shape::asTensor(void) const +nnfw::misc::tensor::Shape Shape::asTensor(void) const { - return nnfw::util::tensor::Shape(*this); // this shape represents shape of NNAPI + return nnfw::misc::tensor::Shape(*this); // this shape represents shape of NNAPI } -nnfw::util::kernel::Shape Shape::asKernel(void) const +nnfw::misc::kernel::Shape Shape::asKernel(void) const { assert(rank() == 4); @@ -84,7 +82,7 @@ nnfw::util::kernel::Shape Shape::asKernel(void) const const auto height = dim(1); const auto width = dim(2); - return nnfw::util::kernel::Shape(count, depth, height, width); + return nnfw::misc::kernel::Shape(count, depth, height, width); } // Extended dimension is filled with 1. @@ -120,6 +118,11 @@ const Object &Set::at(const Index &index) const { return *(_objects.at(index.asI Object &Set::at(const Index &index) { return *(_objects.at(index.asInt())); } +bool Set::exist(const Index &index) const +{ + return index.asInt() >= 0 && index.asInt() < _objects.size(); +} + } // namespace operand } // namespace tflite } // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/Model.h b/runtimes/pure_arm_compute/src/internal/Model.h index 33ba3a8fd..bdcf32f6f 100644 --- a/runtimes/pure_arm_compute/src/internal/Model.h +++ b/runtimes/pure_arm_compute/src/internal/Model.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Model.h + * @brief This file contains classes for handle internal Model object + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_MODEL_H__ #define __INTERNAL_MODEL_H__ @@ -24,15 +30,26 @@ namespace tflite namespace operand { +/** + * @brief Class to express index of operand. + */ class Index { public: + /** + * @brief Construct a new Index object for operand with param. + * @param [in] value The number of index + */ explicit Index(int value) : _value{value} { // DO NOTHING } public: + /** + * @brief Get index value as int + * @return Index value as int + */ int asInt(void) const { return _value; } private: @@ -46,10 +63,10 @@ private: #include <vector> #include <cstdint> -#include "util/feature/Shape.h" -#include "util/matrix/Shape.h" -#include "util/kernel/Shape.h" -#include "util/tensor/Shape.h" +#include "misc/feature/Shape.h" +#include "misc/matrix/Shape.h" +#include "misc/kernel/Shape.h" +#include "misc/tensor/Shape.h" namespace internal { @@ -58,19 +75,51 @@ namespace tflite namespace operand { -struct Shape : public nnfw::util::tensor::Shape +/** + * @brief Class to express shape of operand. + */ +struct Shape : public nnfw::misc::tensor::Shape { public: + /** + * @brief Construct a new Shape object for operand with param. + * @param [in] rank The rank value of shape + */ Shape(uint32_t rank); public: + /** + * @brief Get dimension value of tensor as vector + * @return Dimension value(int32_t) of tensor as vector + */ int32_t asVector(void) const; - nnfw::util::feature::Shape asFeature(void) const; - nnfw::util::matrix::Shape asMatrix(void) const; - nnfw::util::kernel::Shape asKernel(void) const; - nnfw::util::tensor::Shape asTensor(void) const; + /** + * @brief Get dimension values of tensor as feature::Shape + * @return Dimension values of tensor as feature::Shape + */ + nnfw::misc::feature::Shape asFeature(void) const; + /** + * @brief Get dimension values of tensor as matrix::Shape + * @return Dimension values of tensor as matrix::Shape + */ + nnfw::misc::matrix::Shape asMatrix(void) const; + /** + * @brief Get dimension values of tensor as kernel::Shape + * @return Dimension values of tensor as kernel::Shape + */ + nnfw::misc::kernel::Shape asKernel(void) const; + /** + * @brief Get dimension values of tensor::Shape + * @return Dimension values of tensor::Shape + */ + nnfw::misc::tensor::Shape asTensor(void) const; public: + /** + * @brief Extend rank of Shape object for operand with param. + * @param [in] to_rank The rank value to be extended to + * @return N/A + */ void extendRank(size_t); }; @@ -87,27 +136,60 @@ namespace tflite namespace operand { +/** + * @brief Class to have data of operand. + */ struct Data { + /** + * @brief Destruct this object + */ virtual ~Data() = default; + /** + * @brief Get size of data + * @return size of data + */ virtual size_t size(void) const = 0; + /** + * @brief Get the base address of data + * @return the base address of data + */ virtual const uint8_t *base(void) const = 0; }; +/** + * @brief Class to have cached data of operand. + */ class CachedData final : public Data { public: + /** + * @brief Construct a new CachedData object for operand with param. + * @param [in] base the base address of data + * @param [in] size the size of data + */ CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size} { std::copy(base, base + size, _base); } public: + /** + * @brief Destruct this object + */ ~CachedData() { delete[] _base; } public: + /** + * @brief Get size of data + * @return size of data + */ size_t size(void) const override { return _size; } + /** + * @brief Get the base address of data + * @return the base address of data + */ const uint8_t *base(void) const override { return _base; } private: @@ -115,16 +197,32 @@ private: size_t _size; }; +/** + * @brief Class to have external data of operand. + */ class ExternalData final : public Data { public: + /** + * @brief Construct a new ExternalData object for operand with param. + * @param [in] base the base address of data + * @param [in] size the size of data + */ ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Get size of data + * @return size of data + */ size_t size(void) const override { return _size; } + /** + * @brief Get the base address of data + * @return the base address of data + */ const uint8_t *base(void) const override { return _base; } private: @@ -148,9 +246,19 @@ namespace tflite namespace operand { +/** + * @brief Class to express operand as object. + */ class Object { public: + /** + * @brief Construct a new Object object for operand with param. + * @param [in] shape shape of operand + * @param [in] type type of operand + * @param [in] scale scale of operand + * @param [in] zeroPoint zeroPoint of operand + */ explicit Object(const Shape &shape, const int32_t type, const float scale, const int32_t zeroPoint) : _shape{shape}, _type{type}, _scale{scale}, _zeroPoint{zeroPoint} @@ -159,25 +267,58 @@ public: } public: + /** + * @brief Get shape of operand + * @return Reference of shape of operand + */ const Shape &shape(void) const { return _shape; } + /** + * @brief Get type of operand + * @return type of operand + */ const int32_t type(void) const { return _type; } + /** + * @brief Get scale of operand + * @return scale of operand + */ const float scale(void) const { return _scale; } + /** + * @brief Get zeroPoint of operand + * @return zeroPoint of operand + */ const int32_t zeroPoint(void) const { return _zeroPoint; } private: void data(std::unique_ptr<Data> &&data) { _data = std::move(data); } public: + /** + * @brief Get data of operand + * @return Reference of data of operand + */ const Data &data(void) const { return *_data; } + /** + * @brief Get true if Object has data, otherwise @c false + * @return @c true if Object has data, otherwise @c false + */ bool hasData(void) const { return _data != nullptr; } public: + /** + * @brief Set data of operand with param + * @param [in] args arguments of data to be set + * @return N/A + */ template <typename T, typename... Args> void data(Args &&... args) { data(std::unique_ptr<T>(new T{std::forward<Args>(args)...})); } public: + /** + * @brief Get value of data as scalar + * @return value of data as scalar + */ template <typename T> T asScalar(void) const { assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1))); @@ -188,6 +329,11 @@ public: } public: + /** + * @brief Get value of data as ReorderBits + * @param [in] numOfBits The number of bits to be reordered to + * @return value of data as ReorderBits + */ template <typename T> T asReorderBits(size_t numOfBits) const { assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1))); @@ -209,8 +355,6 @@ private: } // namespace tflite } // namespace internal -#include <memory> - namespace internal { namespace tflite @@ -218,9 +362,17 @@ namespace tflite namespace operand { +/** + * @brief Class to have object instances in a kind of set + */ class Set { public: + /** + * @brief Iterate objects with fn + * @param [in] fn function to be iterated + * @return N/A + */ void iterate(const std::function<void(const Index &)> &fn) { for (uint32_t n = 0; n < _objects.size(); ++n) @@ -231,12 +383,35 @@ public: } public: + /** + * @brief Append Object for operand with param + * @param [in] shape shape of operand + * @param [in] type type of operand + * @param [in] scale scale of operand + * @param [in] zeroPoint zeroPoint of operand + * @return Value of Index which has been appended to + */ Index append(const Shape &, int32_t type, float scale, int32_t zeroPoint); public: + /** + * @brief Get Object at Index + * @param [in] index Index to be at + * @return Const refernece of Object + */ const Object &at(const Index &) const; + /** + * @brief Get Object at Index + * @param [in] index Index to be at + * @return Refernece of Object + */ Object &at(const Index &); + /** + * @brief Get size of operands in Set + * @return Value of size + */ size_t size(void) const { return _objects.size(); } + bool exist(const Index &) const; private: std::vector<std::unique_ptr<Object>> _objects; @@ -255,16 +430,36 @@ namespace tflite namespace op { +/** + * @brief Class to have sequence operators. + */ class Sequence { public: + /** + * @brief Construct a new Sequence object for operator as default + */ Sequence() = default; public: + /** + * @brief Get size of operators in Sequence + * @return Value of size + */ uint32_t size(void) const { return _ops.size(); } public: + /** + * @brief Get op::Node at Index + * @param [in] nth index to be at + * @return Refernece of op::Node + */ op::Node &at(uint32_t nth) { return *(_ops.at(nth)); } + /** + * @brief Get op::Node at Index + * @param [in] nth index to be at + * @return Const refernece of op::Node + */ const op::Node &at(uint32_t nth) const { return *(_ops.at(nth)); } private: @@ -275,6 +470,11 @@ private: } public: + /** + * @brief Add op::Node with param + * @param [in] args arguments of op::Node to be set + * @return Reference of Sequence + */ template <typename T, typename... Args> Sequence &emplace_back(Args &&... args) { return emplace_back(std::unique_ptr<T>(new T{std::forward<Args>(args)...})); @@ -293,14 +493,33 @@ namespace internal namespace tflite { +/** + * @brief Class to have operand::Set as operands and op::Sequence as operators + */ class Model { public: + /** + * @brief Get operand::Set + * @return Reference of operand::Set + */ operand::Set &operands(void) { return _operands; } + /** + * @brief Get operand::Set + * @return Const reference of operand::Set + */ const operand::Set &operands(void) const { return _operands; } public: + /** + * @brief Get op::Sequence + * @return Reference of op::Sequence + */ op::Sequence &operations(void) { return _operations; } + /** + * @brief Get op::Sequence + * @return Const reference of op::Sequence + */ const op::Sequence &operations(void) const { return _operations; } private: @@ -309,8 +528,8 @@ private: public: // TODO Hide these fields - std::vector<operand::Index> inputs; - std::vector<operand::Index> outputs; + std::vector<operand::Index> inputs; /**< indexes of operand as input */ + std::vector<operand::Index> outputs; /**< indexes of operand as output */ }; } // namespace tflite diff --git a/runtimes/pure_arm_compute/src/internal/Sink.h b/runtimes/pure_arm_compute/src/internal/Sink.h index af3b37e61..6f44561ea 100644 --- a/runtimes/pure_arm_compute/src/internal/Sink.h +++ b/runtimes/pure_arm_compute/src/internal/Sink.h @@ -14,15 +14,31 @@ * limitations under the License. */ +/** + * @file Sink.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Sink struct + */ #ifndef __INTERNAL_SINK_H__ #define __INTERNAL_SINK_H__ #include <arm_compute/core/ITensor.h> +/** + * @brief Struct to get tensor data from arm compute tensor (abstract) + */ struct Sink { + /** + * @brief Destroy the Sink object + */ virtual ~Sink() = default; + /** + * @brief Get tensor data from arm compute tensor + * @param[in] tensor Tensor object of arm compute to get data + * @return N/A + */ virtual void pull(::arm_compute::ITensor &tensor) const = 0; }; diff --git a/runtimes/pure_arm_compute/src/internal/Sinks.h b/runtimes/pure_arm_compute/src/internal/Sinks.h index e8a7d5966..7317c67c1 100644 --- a/runtimes/pure_arm_compute/src/internal/Sinks.h +++ b/runtimes/pure_arm_compute/src/internal/Sinks.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       Sinks.h + * @brief      This file contains TensorSink class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_SINKS_H__ #define __INTERNAL_SINKS_H__ @@ -28,29 +34,46 @@ #include "internal/nnapi/tensor/View.h" #include "internal/arm_compute/tensor/View.h" -#include "util/tensor/IndexIterator.h" +#include "misc/tensor/IndexIterator.h" +/** + * @brief Class to store NN model output data for general-shaped tensors. + * This is for pulling data to internal tensor from other tensor. + * @tparam T Type of the data elements + */ template <typename T> class TensorSink final : public Sink { public: - TensorSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size) + /** + * @brief Construct a TensorSink object + * + * @param[in] shape general-shaped tensor dimensions + * @param[in] base Base pointer of the actual data + * @param[in] size Size of the data + */ + TensorSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Pull the data into the internal structure + * @param[in] tensor The tensor which contains source data + * @return N/A + */ void pull(::arm_compute::ITensor &tensor) const override { const ::internal::arm_compute::tensor::View<T> from{&tensor}; ::internal::nnapi::tensor::View<T> into{_shape, _base, _size}; - using ::nnfw::util::tensor::iterate; - using ::nnfw::util::tensor::Index; + using ::nnfw::misc::tensor::iterate; + using ::nnfw::misc::tensor::Index; const uint32_t rank = _shape.rank(); - ::nnfw::util::tensor::iterate(_shape) << [&](const Index &raw) { + ::nnfw::misc::tensor::iterate(_shape) << [&](const Index &raw) { Index permuted(raw.rank()); for (uint32_t axis = 0; axis < rank; ++axis) @@ -64,7 +87,7 @@ public: } private: - const nnfw::util::tensor::Shape _shape; + const nnfw::misc::tensor::Shape _shape; private: T *const _base; diff --git a/runtimes/pure_arm_compute/src/internal/Source.h b/runtimes/pure_arm_compute/src/internal/Source.h index a159e5092..fa8f1e811 100644 --- a/runtimes/pure_arm_compute/src/internal/Source.h +++ b/runtimes/pure_arm_compute/src/internal/Source.h @@ -14,15 +14,32 @@ * limitations under the License. */ +/** + * @file Source.h + * @brief This file contains Source struct for pushing ITensor + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_SOURCE_H__ #define __INTERNAL_SOURCE_H__ #include <arm_compute/core/ITensor.h> +/** + * @brief Struct to push inner source to ITensor. + */ struct Source { + /** + * @brief Destructor as default + */ virtual ~Source() = default; + /** + * @brief Push inner source to ITensor + * @param [in] tensor ITensor to be pushed into + * @return N/A + */ virtual void push(::arm_compute::ITensor &tensor) const = 0; }; diff --git a/runtimes/pure_arm_compute/src/internal/Swizzle.h b/runtimes/pure_arm_compute/src/internal/Swizzle.h index 66bf7aef6..f127b8a3b 100644 --- a/runtimes/pure_arm_compute/src/internal/Swizzle.h +++ b/runtimes/pure_arm_compute/src/internal/Swizzle.h @@ -14,28 +14,53 @@ * limitations under the License. */ +/** + * @file Swizzle.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines ARMComputeAxis class and utility functions to support mapping + * between arm compute axis and NNAPI axis + */ #ifndef __SWIZZLE_H__ #define __SWIZZLE_H__ +/** + * @brief Class to represent arm compute axis + */ class ARMComputeAxis { public: + /** + * @brief Construct a new ARMComputeAxis object + */ ARMComputeAxis() = default; public: + /** + * @brief Construct a new ARMComputeAxis object + * @param[in] value Raw axis number + */ explicit ARMComputeAxis(uint32_t value) : _value{value} { // DO NOTHING } public: + /** + * @brief Get raw axis number + * @return Raw axis number + */ uint32_t value(void) const { return _value; } private: uint32_t _value; }; -// Convert T/F Lite / NNAPI axis (based on ...NHWC) to ARMCompute axis (WHCN...) +/** + * @brief Convert T/F Lite / NNAPI axis (based on ...NHWC) to arm compute axis (WHCN...) + * @param[in] rank Rank of shape + * @param[in] axis Axis to map + * @return ARMComputeAxis including arm compute axis info + */ inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis) { assert(rank > axis); @@ -68,6 +93,12 @@ inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis) #include <cassert> +/** + * @brief Covert bitmask info from NNAPI axis to arm compute axis + * @param[in] in Bitmask data + * @param[in] numOfBits Used bits (rank) + * @return Coverted bitmask + */ template <typename T> inline T ReorderBits(T in, size_t numOfBits) { assert(numOfBits > 0); diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h index 20de3b9e8..1e14e2d6c 100644 --- a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h +++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file Tensor3DSink.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Tensor3DSink class + */ #ifndef __TENSOR3D_SINK_H__ #define __TENSOR3D_SINK_H__ @@ -26,16 +31,30 @@ #include <arm_compute/core/Window.h> #include <arm_compute/core/Helpers.h> +/** + * @brief Class to get tensor data from arm compute tensor + */ template <typename T> class Tensor3DSink final : public Sink { public: - Tensor3DSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size) + /** + * @brief Construct a new Tensor3DSink object + * @param[in] shape Shape of tensor + * @param[in] base Pointer to get data + * @param[in] size Size of tensor + */ + Tensor3DSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Get tensor data from arm compute tensor to base + * @param[in] tensor Tensor object of arm compute to get data + * @return N/A + */ void pull(::arm_compute::ITensor &tensor) const override { using ::arm_compute::Window; @@ -60,7 +79,7 @@ public: } private: - const nnfw::util::tensor::Shape _shape; + const nnfw::misc::tensor::Shape _shape; private: T *const _base; diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h index c100bbdd2..3d8d1b958 100644 --- a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h +++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file Tensor3DSource.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Tensor3DSource class + */ #ifndef __TENSOR3D_SOURCE_H__ #define __TENSOR3D_SOURCE_H__ @@ -26,16 +31,30 @@ #include <arm_compute/core/Window.h> #include <arm_compute/core/Helpers.h> +/** + * @brief Class to push tensor data to arm compute tensor + */ template <typename T> class Tensor3DSource final : public Source { public: - Tensor3DSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size) + /** + * @brief Construct a new Tensor3DSource object + * @param[in] shape Shape of tensor + * @param[in] base Pointer of tensor data to push + * @param[in] size Size of tensor + */ + Tensor3DSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Push tensor data to arm compute tensor + * @param[out] tensor Tensor object of arm compute to push tensor data + * @return N/A + */ void push(::arm_compute::ITensor &tensor) const override { using ::arm_compute::Window; @@ -60,7 +79,7 @@ public: } private: - const nnfw::util::tensor::Shape _shape; + const nnfw::misc::tensor::Shape _shape; private: const T *const _base; diff --git a/runtimes/pure_arm_compute/src/internal/TensorSource.h b/runtimes/pure_arm_compute/src/internal/TensorSource.h index 0ddc44855..114d3588e 100644 --- a/runtimes/pure_arm_compute/src/internal/TensorSource.h +++ b/runtimes/pure_arm_compute/src/internal/TensorSource.h @@ -14,11 +14,17 @@ * limitations under the License. */ +/** + * @file TensorSource.h + * @brief This file contains TensorSource class which is inherited from Source class + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_TENSOR_SOURCE_H__ #define __INTERNAL_TENSOR_SOURCE_H__ -#include <util/tensor/Shape.h> -#include <util/tensor/IndexIterator.h> +#include <misc/tensor/Shape.h> +#include <misc/tensor/IndexIterator.h> #include "internal/Source.h" #include "internal/Swizzle.h" @@ -26,24 +32,38 @@ #include "internal/arm_compute/tensor/View.h" // NOTE TensorSource is much slower than specialized Source(s) +/** + * @brief Class to define constructor and push function + */ template <typename T> class TensorSource final : public Source { public: - TensorSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size) + /** + * @brief Construct a new TensorSource object with params + * @param [in] shape Shape of tensor + * @param [in] base Base address + * @param [in] size Size of tensor + */ + TensorSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size) : _shape{shape}, _base{base}, _size{size} { // DO NOTHING } public: + /** + * @brief Function for pushing tensor + * @param [in] tensor Tensor to be pushed + * @return N/A + */ void push(::arm_compute::ITensor &tensor) const override { const ::internal::nnapi::tensor::Reader<T> from{_shape, _base, _size}; ::internal::arm_compute::tensor::View<T> into{&tensor}; - ::nnfw::util::tensor::iterate(_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) { + ::nnfw::misc::tensor::iterate(_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) { const auto rank = index_nnapi.rank(); - nnfw::util::tensor::Index index_ACL(rank); + nnfw::misc::tensor::Index index_ACL(rank); for (uint32_t axis = 0; axis < rank; ++axis) { @@ -55,7 +75,7 @@ public: } private: - const nnfw::util::tensor::Shape _shape; + const nnfw::misc::tensor::Shape _shape; const T *const _base; const size_t _size; }; diff --git a/runtimes/pure_arm_compute/src/internal/VectorSink.h b/runtimes/pure_arm_compute/src/internal/VectorSink.h index d1bf962e2..a630ef1c1 100644 --- a/runtimes/pure_arm_compute/src/internal/VectorSink.h +++ b/runtimes/pure_arm_compute/src/internal/VectorSink.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       VectorSink.h + * @brief      This file contains VectorSink class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_VECTOR_SINK_H__ #define __INTERNAL_VECTOR_SINK_H__ @@ -23,18 +29,31 @@ #include <cassert> -// -// VectorSink -// +/** + * @brief Class to store vector(2D) output data. + * This is for pulling out the data to another tensor. + * @tparam T Type of the data elements + */ template <typename T> class VectorSink final : public Sink { public: + /** + * @brief Construct a VectorSink object + * @param[in] vlen Length of the vector + * @param[in] base Base pointer of the actual data + * @param[in] size Size of the data + */ VectorSink(const int32_t vlen, T *base, const size_t size) : _vlen{vlen}, _base{base} { assert(size >= _vlen * sizeof(T)); } public: + /** + * @brief Pull the data into the internal structure + * @param[in] tensor The tensor which contains source data + * @return N/A + */ void pull(::arm_compute::ITensor &tensor) const override { for (int32_t n = 0; n < _vlen; ++n) diff --git a/runtimes/pure_arm_compute/src/internal/VectorSource.h b/runtimes/pure_arm_compute/src/internal/VectorSource.h index 41aab07e4..48d3d3209 100644 --- a/runtimes/pure_arm_compute/src/internal/VectorSource.h +++ b/runtimes/pure_arm_compute/src/internal/VectorSource.h @@ -14,20 +14,42 @@ * limitations under the License. */ +/** + * @file       VectorSource.h + * @brief      This file contains VectorSource class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_VECTOR_SOURCE_H__ #define __INTERNAL_VECTOR_SOURCE_H__ #include "internal/Source.h" +/** + * @brief Class to store vector(2D) input data. + * This is for push out the data to another tensor. + * @tparam T Type of the data elements + */ template <typename T> class VectorSource final : public Source { public: + /** + * @brief Construct a VectorSource object + * @param[in] vlen Length of the vector + * @param[in] base Base pointer of the actual data + * @param[in] size Size of the data + */ VectorSource(const int32_t vlen, const T *base, const size_t size) : _vlen{vlen}, _base{base} { assert(size >= _vlen * sizeof(T)); } public: + /** + * @brief Push the data out to the another tensor + * @param[out] The tensor that output data will be stored + * @return N/A + */ void push(::arm_compute::ITensor &tensor) const override { for (int32_t n = 0; n < _vlen; ++n) diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute.h b/runtimes/pure_arm_compute/src/internal/arm_compute.h index ef43ed45c..fb6acaf81 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       arm_compute.h + * @brief      This file contains arm_compute library related classes + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_ARM_COMPUTE_H__ #define __INTERNAL_ARM_COMPUTE_H__ @@ -28,6 +34,9 @@ namespace arm_compute namespace operand { +/** + * @brief Class to access the tensor object + */ class Object { public: @@ -40,12 +49,21 @@ public: } public: + /** + * @brief Get the tensor pointer + * @return The tensor pointer + */ ::arm_compute::ITensor *ptr(void) const { return _tensor.get(); } private: std::shared_ptr<::arm_compute::ITensor> _tensor; public: + /** + * @brief Access the tensor object and run the given function + : @param[in] fn The actual behavior when accessing the tensor object + * @return N/A + */ void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const; }; @@ -64,24 +82,48 @@ namespace arm_compute namespace operand { +/** + * @brief Class to manage Object instances + */ class Context { public: + /** + * @brief Set index and tensor pair + * @param[in] ind The operand index + * @param[in] tensor The tensor object + * @return This object reference + */ Context &set(const ::internal::tflite::operand::Index &ind, const std::shared_ptr<::arm_compute::ITensor> &tensor); public: + /** + * @brief Check if the tensor for given index is exist + * @param[in] ind The operand Index + * @return @c true if the entry for ind is exist, otherwise @c false + */ bool exist(const ::internal::tflite::operand::Index &ind) const { return _objects.find(ind.asInt()) != _objects.end(); } public: + /** + * @brief Lookup the tensor with the given index + * @param[in] ind The index as the key + * @return The object const reference + */ const Object &at(const ::internal::tflite::operand::Index &ind) const { return _objects.at(ind.asInt()); } + /** + * @brief Lookup the tensor with the given index + * @param[in] ind The index as the key + * @return The object reference + */ Object &at(const ::internal::tflite::operand::Index &ind) { return _objects.at(ind.asInt()); } private: @@ -101,19 +143,38 @@ namespace arm_compute namespace op { +/** + * @brief Class to wrap IFunction + */ class Step { public: + /** + * @brief Construct a Step object + * @param[in] func The compiled code to be executed + */ Step(std::unique_ptr<::arm_compute::IFunction> &&func) : _func{std::move(func)} { // DO NOTHING } public: + /** + * @brief Run _func + * @return N/A + */ void run(void) const { _func->run(); } public: + /** + * @brief Get member @c _name + * @return The name as const reference + */ const std::string &name(void) const { return _name; } + /** + * @brief Get member @c _name + * @return The name as reference + */ std::string &name(void) { return _name; } private: @@ -121,7 +182,15 @@ private: std::unique_ptr<::arm_compute::IFunction> _func; #ifdef TFLITE_PROFILING_ENABLED public: + /** + * @brief Get member @c _op_index + * @return The operation index as value + */ int op_idx() const { return _op_idx; } + /** + * @brief Get member @c _op_index + * @return The operation index as reference + */ int &op_idx() { return _op_idx; } private: int _op_idx; @@ -139,12 +208,24 @@ namespace arm_compute namespace op { +/** + * @brief Class managing compiled operation code Sequence + */ class Sequence { public: + /** + * @brief Get size of sequence + * @return Number of sequence steps + */ uint32_t size(void) const { return _functions.size(); } public: + /** + * @brief Append a Function to the sequence + * @param[in] func Function to be appended + * @return This object reference + */ Sequence &append(std::unique_ptr<::arm_compute::IFunction> &&func) { _functions.emplace_back(std::move(func)); @@ -152,7 +233,17 @@ public: } public: + /** + * @brief Get the step entry on the index @c n + * @param[in] n The index + * @return The step object as reference + */ Step &at(uint32_t n) { return _functions.at(n); } + /** + * @brief Get the step entry on the index @c n + * @param[in] n The index + * @return The step object as const reference + */ const Step &at(uint32_t n) const { return _functions.at(n); } private: @@ -169,23 +260,50 @@ namespace internal namespace arm_compute { +/** + * @brief Class to manage compiled operation sequence + */ class Plan { public: + /** + * @brief Construct a Plan object + * @param[in] model Model that we want to compile + */ Plan(const std::shared_ptr<const ::internal::tflite::Model> &model) : _model(model) { // DO NOTHING } public: + /** + * @brief Get the model object + * @return The model object as const reference + */ const ::internal::tflite::Model &model(void) const { return *_model; } public: + /** + * @brief Get operand context + * @return The operand context as reference + */ operand::Context &operands(void) { return _operands; } + /** + * @brief Get operand context + * @return The operand context as const reference + */ const operand::Context &operands(void) const { return _operands; } public: + /** + * @brief Get operation sequence + * @return The operation sequence as reference + */ op::Sequence &operations(void) { return _ops; } + /** + * @brief Get operation sequence + * @return The operation sequence as const reference + */ const op::Sequence &operations(void) const { return _ops; } private: @@ -204,7 +322,10 @@ namespace internal namespace arm_compute { -// check if this runtime runs on GPU or NEON +/** + * @brief Check if this runtime runs on GPU or NEON + * @return @c true if GPU mode, otherwise @c false + */ bool isGpuMode(); #define CAST_CL(tensor) static_cast<::arm_compute::CLTensor *>(tensor) diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc new file mode 100644 index 000000000..ff2f79309 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/arm_compute/Cast.h" + +#include "internal/Swizzle.h" + +::arm_compute::Coordinates getARMComputeAxises(uint32_t rank) +{ + ::arm_compute::Coordinates res{}; + + res.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + res.set(axis, ToARMComputeAxis(rank, axis).value()); + } + + return res; +} + +::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord, + const ::arm_compute::Coordinates &axises) +{ + ::arm_compute::Coordinates id{}; + assert(runtime_coord.num_dimensions() == axises.num_dimensions()); + for (size_t i = 0; i < runtime_coord.num_dimensions(); ++i) + { + id.set(axises[i], runtime_coord[i]); + } + return id; +} + +// Restructure runtime_permutationVector to ACL_permutationVector +::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, + const int32_t *runtime_pv) +{ + // rank upto 4 is supported + assert(rank <= 4); + assert(runtime_pv != nullptr); + + int new_pv[4] = {0}; + ::arm_compute::Coordinates axises = getARMComputeAxises(rank); + + if (rank == 4) + { + /** + axises = {3,1,0,2} + NNAPI PermutationVector + N 0 3 + H 1 1 + W 2 0 + C 3 2 + **/ + new_pv[0] = axises[runtime_pv[2]]; + new_pv[1] = axises[runtime_pv[1]]; + new_pv[2] = axises[runtime_pv[3]]; + new_pv[3] = axises[runtime_pv[0]]; + } + else + { + /** + mapping/axises = {rank-1 to 0} + CHW --------> WHC + or + WH ----------> HW + **/ + for (int id = 0; id < rank; ++id) + { + new_pv[id] = axises[runtime_pv[rank - id - 1]]; + } + } + + return ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]}; +} + +::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape, + bool apply_dim_correction) +{ + const uint32_t rank = shape.rank(); + + ::arm_compute::TensorShape res{}; + + res.set_num_dimensions(rank); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + // NOTE In some cases, in incorrect dimensions is required. + // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of + // LSTM is used as the weight of the FullyConnected. + // The FullyConnected's weight must be greater or equal than 2-dimensions. + // However, if the dimension correction is applied to input_to_input_weights with input_size + // equal to 1, it will be changed to 1-D. + // So input_to_input_weights is not used by the weight of FullyConnected. + res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction); + } + + return res; +} + +::arm_compute::DataType asDataType(const int32_t type) +{ + switch (type) + { + case ANEURALNETWORKS_FLOAT32: + case ANEURALNETWORKS_TENSOR_FLOAT32: + return ::arm_compute::DataType::F32; + case ANEURALNETWORKS_INT32: + case ANEURALNETWORKS_TENSOR_INT32: + return ::arm_compute::DataType::S32; + case ANEURALNETWORKS_UINT32: + return ::arm_compute::DataType::U32; + case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: + return ::arm_compute::DataType::QASYMM8; + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + +::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code) +{ + switch (code) + { + case ANEURALNETWORKS_FUSED_NONE: + return ::arm_compute::ActivationLayerInfo{}; + case ANEURALNETWORKS_FUSED_RELU: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + case ANEURALNETWORKS_FUSED_RELU1: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + case ANEURALNETWORKS_FUSED_RELU6: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + +::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset) +{ + return ::arm_compute::QuantizationInfo(scale, offset); +} + +::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type, + const float scale, const int32_t zeroPoint) +{ + return ::arm_compute::TensorInfo(shape, 1, asDataType(type), + asQuantizationInfo(scale, zeroPoint)); +} diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h index e2ceb8fef..42b547feb 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h @@ -14,104 +14,98 @@ * limitations under the License. */ +/** + * @file Cast.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines casting functions from internal object to arm compute object + */ #ifndef __ARM_COMPUTE_CAST_H__ +#define __ARM_COMPUTE_CAST_H__ +#include <arm_compute/core/Coordinates.h> +#include <arm_compute/core/TensorInfo.h> #include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Types.h> -#include "internal/Swizzle.h" -#include "internal/Model.h" - -inline ::arm_compute::Coordinates getARMComputeAxises(uint32_t rank) -{ - ::arm_compute::Coordinates res{}; - - res.set_num_dimensions(rank); - - for (uint32_t axis = 0; axis < rank; ++axis) - { - res.set(axis, ToARMComputeAxis(rank, axis).value()); - } - - return res; -} - -inline ::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape, - bool apply_dim_correction = true) -{ - const uint32_t rank = shape.rank(); +#include <NeuralNetworks.h> - ::arm_compute::TensorShape res{}; - - res.set_num_dimensions(rank); - - for (uint32_t axis = 0; axis < rank; ++axis) - { - // NOTE In some cases, in incorrect dimensions is required. - // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of - // LSTM is used as the weight of the FullyConnected. - // The FullyConnected's weight must be greater or equal than 2-dimensions. - // However, if the dimension correction is applied to input_to_input_weights with input_size - // equal to 1, it will be changed to 1-D. - // So input_to_input_weights is not used by the weight of FullyConnected. - res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction); - } - - return res; -} +#include "internal/Model.h" -::arm_compute::DataType asDataType(const int32_t type) -{ - switch (type) - { - case ANEURALNETWORKS_FLOAT32: - case ANEURALNETWORKS_TENSOR_FLOAT32: - return ::arm_compute::DataType::F32; - case ANEURALNETWORKS_INT32: - case ANEURALNETWORKS_TENSOR_INT32: - return ::arm_compute::DataType::S32; - case ANEURALNETWORKS_UINT32: - return ::arm_compute::DataType::U32; - case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM: - return ::arm_compute::DataType::QASYMM8; - default: - throw std::runtime_error("Not supported, yet"); - break; - } -} +/** + * @brief Generate arm compute coordinate object from rank + * @param[in] rank Rank number + * @return Coordinate object + */ +::arm_compute::Coordinates getARMComputeAxises(uint32_t rank); + +/** + * @brief Generate arm compute coordinate object from runtime coordinate object + * @param[in] runtime_coord Runtime coordinates object + * @param[in] axises Coordinates for axises to map runtime-coordinates to + * arm_compute-coordinates + * @return Arm_compute coordinate object + */ +::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord, + const ::arm_compute::Coordinates &axises); + +/** +* @brief Generate arm compute permutation vector from runtime permutation vector +* @param[in] rank Rank number supported upto 4 +* @param[in] runtime_pv Integer array for runtime permutation vector +* @return Permutation vector of arm compute +*/ +::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank, + const int32_t *runtime_pv); +/** + * @brief Cast from shape of internal to arm compute + * @param[in] shape Internal shape object + * @param[in] apply_dim_correction Flag to state whether apply dimension correction after setting + * one dimension in arm compute + * @return TensorShape object of arm compute + */ +::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape, + bool apply_dim_correction = true); -::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code) -{ - switch (code) - { - case ANEURALNETWORKS_FUSED_NONE: - return ::arm_compute::ActivationLayerInfo{}; - case ANEURALNETWORKS_FUSED_RELU: - return ::arm_compute::ActivationLayerInfo{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - case ANEURALNETWORKS_FUSED_RELU1: - return ::arm_compute::ActivationLayerInfo{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - case ANEURALNETWORKS_FUSED_RELU6: - return ::arm_compute::ActivationLayerInfo{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; - default: - throw std::runtime_error("Not supported, yet"); - break; - } -} +/** + * @brief Cast from data type enum of NNAPI to arm compute + * @param[in] type NNAPI data type + * @return Data type of arm compute + */ +::arm_compute::DataType asDataType(const int32_t type); -::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset) -{ - return ::arm_compute::QuantizationInfo(scale, offset); -} +/** + * @brief Cast from NNAPI activation type enum to activation object of arm compute + * @param[in] code NNAPI activation type + * @return ActivationLayerInfo object of arm compute + */ +::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code); +/** + * @brief Generate quantization info object of arm compute + * @param[in] scale Scale of quantization + * @param[in] offset Offset of quantization + * @return QuantizationInfo object of arm compute + */ +::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset); + +/** + * @brief Cast from internal tensor info to tensor info object of arm compute + * @param[in] shape Tensor shape + * @param[in] type Tensor type + * @param[in] scale Scale of tensor quantization + * @param[in] zeroPoint Zeropoint of tensor quantization + * @return TensorInfo object of arm compute + */ ::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type, - const float scale = 0.0f, const int32_t zeroPoint = 0) -{ - return ::arm_compute::TensorInfo(shape, 1, asDataType(type), - asQuantizationInfo(scale, zeroPoint)); -} - + const float scale = 0.0f, const int32_t zeroPoint = 0); + +/** + * @brief Set value to arm compute tensor with casting + * @param[in] value Value to set + * @param[out] to Target tensor of arm compute + * @param[in] id Position of element + * @return N/A + */ template <typename FromT> void copyCast(const FromT value, ::arm_compute::ITensor *to, const ::arm_compute::Coordinates &id) { diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h index 9d19021ae..c989ef4c2 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h @@ -14,10 +14,15 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::arm_compute::feature::View class + */ #ifndef __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__ #define __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__ -#include "util/feature/Reader.h" +#include "misc/feature/Reader.h" #include <arm_compute/core/ITensor.h> @@ -28,15 +33,29 @@ namespace arm_compute namespace feature { -template <typename T> class View final : public nnfw::util::feature::Reader<T> +/** + * @brief Class to access feature's element + */ +template <typename T> class View final : public nnfw::misc::feature::Reader<T> { public: + /** + * @brief Construct a new View object + * @param[in] tensor Feature to support access + */ View(::arm_compute::ITensor *tensor) : _tensor{tensor} { // DO NOTHING } public: + /** + * @brief Get value of element in 3D feature using channel, row and column + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t ch, uint32_t row, uint32_t col) const override { const auto offset = feature_index_to_byte_offset(ch, row, col); @@ -46,6 +65,14 @@ public: return *ptr; } + /** + * @brief Get value of element in 4D feature using batch, channel, row and column + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override { const auto offset = feature_index_to_byte_offset(batch, ch, row, col); @@ -56,6 +83,13 @@ public: } public: + /** + * @brief Get reference of element in 3D feature using channel, row and column + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Reference of element + */ T &at(uint32_t ch, uint32_t row, uint32_t col) { const auto offset = feature_index_to_byte_offset(ch, row, col); @@ -65,6 +99,14 @@ public: return *ptr; } + /** + * @brief Get reference of element in 4D feature using batch, channel, row and column + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Reference of element + */ T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { const auto offset = feature_index_to_byte_offset(batch, ch, row, col); @@ -75,12 +117,27 @@ public: } private: + /** + * @brief Get offset of element in 3D feature + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Offset of element + */ size_t feature_index_to_byte_offset(uint32_t ch, uint32_t row, uint32_t col) const { // ARM Compute uses CHW ordering return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch}); } + /** + * @brief Get offset of element in 4D feature + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Offset of element + */ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const { // ARM Compute uses CHW ordering diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h index 28054d7c8..399cdf913 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internel::arm_compute::kernel::View class + */ #ifndef __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__ #define __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__ -#include "util/kernel/Shape.h" -#include "util/kernel/Reader.h" +#include "misc/kernel/Shape.h" +#include "misc/kernel/Reader.h" #include <arm_compute/core/ITensor.h> @@ -29,15 +34,30 @@ namespace arm_compute namespace kernel { -template <typename T> class View final : public nnfw::util::kernel::Reader<T> +/** + * @brief Class to access kernel's element + */ +template <typename T> class View final : public nnfw::misc::kernel::Reader<T> { public: + /** + * @brief Construct a new View object + * @param[in] tensor Kernel to support access + */ View(::arm_compute::ITensor *tensor) : _tensor{tensor} { // DO NOTHING } public: + /** + * @brief Get value of element in kernel + * @param[in] nth Kernel index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override { const auto offset = kernel_index_to_byte_offset(nth, ch, row, col); @@ -48,6 +68,14 @@ public: } public: + /** + * @brief Get reference of element in kernel + * @param[in] nth Kernel index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Reference of element + */ T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) { const auto offset = kernel_index_to_byte_offset(nth, ch, row, col); @@ -58,6 +86,14 @@ public: } private: + /** + * @brief Get offset of element in kernel + * @param[in] nth Kernel index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Offset of element + */ size_t kernel_index_to_byte_offset(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const { return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch, nth}); diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h index e3534294f..305fff729 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::arm_compute::matrix::View class + */ #ifndef __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__ #define __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__ -#include "util/matrix/Shape.h" -#include "util/matrix/Reader.h" +#include "misc/matrix/Shape.h" +#include "misc/matrix/Reader.h" #include <arm_compute/core/ITensor.h> @@ -29,15 +34,28 @@ namespace arm_compute namespace matrix { -template <typename T> class View final : public nnfw::util::matrix::Reader<T> +/** + * @brief Class to access matrix's element + */ +template <typename T> class View final : public nnfw::misc::matrix::Reader<T> { public: + /** + * @brief Construct a new View object + * @param[in] tensor Matrix to support access + */ View(::arm_compute::ITensor *tensor) : _tensor{tensor} { // DO NOTHING } public: + /** + * @brief Get value of element in matrix + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t row, uint32_t col) const override { const auto offset = matrix_index_to_byte_offset(row, col); @@ -48,6 +66,12 @@ public: } public: + /** + * @brief Get reference of element in matrix + * @param[in] row Row index + * @param[in] col Column index + * @return Refence of element + */ T &at(uint32_t row, uint32_t col) { const auto offset = matrix_index_to_byte_offset(row, col); @@ -58,6 +82,12 @@ public: } private: + /** + * @brief Get offset of element in matrix + * @param[in] row Row index + * @param[in] col Column index + * @return Offset of element + */ size_t matrix_index_to_byte_offset(uint32_t row, uint32_t col) const { return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row}); diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h index 0d8f2ab81..372bd682d 100644 --- a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h +++ b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::arm_compute::tensor::View class + */ #ifndef __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__ #define __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__ -#include "util/tensor/Shape.h" -#include "util/tensor/Index.h" +#include "misc/tensor/Shape.h" +#include "misc/tensor/Index.h" #include <arm_compute/core/ITensor.h> @@ -29,16 +34,28 @@ namespace arm_compute namespace tensor { +/** + * @brief Class to access tensor's element + */ template <typename T> class View { public: + /** + * @brief Construct a new View object + * @param[in] tensor Tensor to support access + */ View(::arm_compute::ITensor *tensor) : _tensor{tensor} { // DO NOTHING } private: - uint32_t byte_offset_of(const nnfw::util::tensor::Index &index) const + /** + * @brief Get offset of element in tensor + * @param[in] index Index of element + * @return Offset of element + */ + uint32_t byte_offset_of(const nnfw::misc::tensor::Index &index) const { // NOTE index.rank() >= _tensor->info()->num_dimensions() should hold here const uint32_t rank = index.rank(); @@ -56,7 +73,12 @@ private: } public: - T at(const nnfw::util::tensor::Index &index) const + /** + * @brief Get value of element in tensor + * @param[in] index Index of element + * @return Value of element + */ + T at(const nnfw::misc::tensor::Index &index) const { const auto offset = byte_offset_of(index); @@ -65,7 +87,12 @@ public: return *ptr; } - T &at(const nnfw::util::tensor::Index &index) + /** + * @brief Get reference of element in tensor + * @param[in] index Index of element + * @return Reference of element + */ + T &at(const nnfw::misc::tensor::Index &index) { const auto offset = byte_offset_of(index); diff --git a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h index 502a1ee0e..83ae7c17b 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       FeatureLoggingLayer.h + * @brief      This file contains FeatureLoggingLayer class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __FEATURE_LOGGING_LAYER_H__ #define __FEATURE_LOGGING_LAYER_H__ @@ -27,9 +33,24 @@ #include "internal/arm_compute.h" +/** + * @brief Class to run FeatureLogging Layer + */ class FeatureLoggingLayer : public ::arm_compute::IFunction { public: + FeatureLoggingLayer(void) : _tag(""), _target(nullptr) + { + // DO NOTHING + } + +public: + /** + * @brief Configure the layer + * @param[in] tag Text tag for this layer + * @param[in] target The feature tensor to be printed + * @return N/A + */ void configure(const std::string &tag, ::arm_compute::ITensor *target) { _tag = tag; @@ -37,6 +58,10 @@ public: } public: + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run(void) override { if (::internal::arm_compute::isGpuMode()) diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc index 311284efc..28789a801 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc @@ -17,8 +17,6 @@ #include "GenericFullyConnectedLayer.h" #include "internal/arm_compute.h" -#include <arm_compute/core/Helpers.h> - void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, ::arm_compute::ITensor *biases, @@ -56,9 +54,9 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input, { // reshape auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape)); - _generic_reshape.configure(CAST_NE(_input), &_neon_buffer); + _generic_reshape.configure(_input, &_neon_buffer); - _neon_fc.configure(&_neon_buffer, CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output)); + _neon_fc.configure(&_neon_buffer, _weights, _biases, _output); // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate // here. @@ -66,7 +64,7 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input, } else { - _neon_fc.configure(CAST_NE(_input), CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output)); + _neon_fc.configure(_input, _weights, _biases, _output); } } } diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h index 55d8683da..f1519f54d 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h @@ -14,23 +14,52 @@ * limitations under the License. */ +/** + * @file       GenericFullyConnectedLayer.h + * @brief      This file contains GenericFullyConnectedLayer class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __GENERIC_FULLY_CONNECTED_LAYER_H__ #define __GENERIC_FULLY_CONNECTED_LAYER_H__ -#include <arm_compute/runtime/Tensor.h> -#include <arm_compute/runtime/CL/CLTensor.h> #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h> #include "internal/layers/GenericReshapeLayer.h" +/** + * @brief Class to run FullyConnected Layer with both CPU and GPU + */ class GenericFullyConnectedLayer : public ::arm_compute::IFunction { public: + GenericFullyConnectedLayer(void) + : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, + _neon_buffer{}, _cl_fc{}, _neon_fc{}, _generic_reshape{}, _needs_reshape(false) + { + // DO NOTHING + } + +public: + /** + * @brief Configure the layer + * @param[in] input The source tensor + * @param[in] weights The tensor that is filled with weight values + * @param[in] biases The tensor that is filled with biase values + * @param[in] output The destination tensor + * @param[in] needs_reshape Whether it needs to be reshaped or not + * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true. + * @return N/A + */ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, ::arm_compute::ITensor *biases, ::arm_compute::ITensor *output, bool needs_reshape, ::arm_compute::TensorShape reshape); public: + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run(void) override; private: diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc index 2cdfe1b6e..c38c2e9e3 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc @@ -43,8 +43,8 @@ void GenericReshapeLayer::configure(::arm_compute::ITensor *input, ::arm_compute } else { - _neon_permute.configure(CAST_NE(input), &_neon_permuted, pv); - _neon_reshape.configure(&_neon_permuted, CAST_NE(output)); + _neon_permute.configure(input, &_neon_permuted, pv); + _neon_reshape.configure(&_neon_permuted, output); // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here. _neon_permuted.allocator()->allocate(); diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h index 1def21085..a22c14c8b 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       GenericReshapeLayer.h + * @brief      This file contains GenericReshapeLayer class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __GENERIC_RESHAPE_LAYER_H__ #define __GENERIC_RESHAPE_LAYER_H__ @@ -25,12 +31,33 @@ #include <arm_compute/runtime/NEON/functions/NEPermute.h> #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> +/** + * @brief Class to run Reshape Layer with both CPU and GPU + */ class GenericReshapeLayer : public ::arm_compute::IFunction { public: + GenericReshapeLayer(void) + : _input(nullptr), _output(nullptr), _cl_permuted{}, _neon_permuted{}, _cl_permute{}, + _cl_reshape{}, _neon_permute{}, _neon_reshape{} + { + // DO NOTHING + } + +public: + /** + * @brief Configure the layer + * @param[in] input The source tensor + * @param[in] output The destination tensor + * @return N/A + */ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); public: + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run(void) override; private: diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc deleted file mode 100644 index 4a5370587..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc +++ /dev/null @@ -1,78 +0,0 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include "PadLayer.h"
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-void PadLayer::configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width)
-{
- _input = input;
- _output = output;
- _border_width = border_width;
- _output_height = _output->info()->dimension(0);
- _output_width = _output->info()->dimension(1);
-
- uint8_t constant_border_value = 0;
- ::arm_compute::PixelValue constant_pixel_value = ::arm_compute::PixelValue(constant_border_value);
-
- unsigned int padding_size = _border_width;
- input->info()->extend_padding(::arm_compute::PaddingSize{padding_size});
- _fillborderkernel.configure(input, _border_width, ::arm_compute::BorderMode::CONSTANT,
- constant_pixel_value);
-}
-
-void PadLayer::run(void)
-{
- _fillborderkernel.run();
-
- ::arm_compute::Coordinates coordinates =
- ::arm_compute::Coordinates(-_border_width, -_border_width);
- ::arm_compute::TensorShape new_tensor_shape =
- ::arm_compute::TensorShape(_output_height, _output_width);
-
- /* NOTE: The cl kernel fills the data in the borders(not in the tensor).
- Once the tensor is received back at NNAPI, we are adjusting
- the valid region in such a way that the padding becomes part of the tensor itself
- and matches the size of output. */
- _input->info()->set_valid_region(::arm_compute::ValidRegion(coordinates, new_tensor_shape));
-
- /* NOTE: Since cl kernel does not have an argument for output tensor while NNAPI does.
- We need to map the input (tensor that is passed to the cl kernel) back to
- output. */
-
- // TODO: Write a modified CLCopy kernel to do this job.
- populateOutput();
-}
-
-void PadLayer::populateOutput()
-{
- auto &queue = ::arm_compute::CLScheduler::get().queue();
- _input->map(queue);
- _output->map(queue);
-
- auto input_tensor = static_cast<::arm_compute::ITensor *>(_input);
- auto const source_data = input_tensor->buffer();
-
- auto output_tensor = static_cast<::arm_compute::ITensor *>(_output);
- auto dst_data = output_tensor->buffer();
-
- memmove(dst_data, source_data, _output_height * _output_width * 4);
-
- _input->unmap(queue);
- _output->unmap(queue);
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc new file mode 100644 index 000000000..6d348e814 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleArgMinMax.h" +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleArgMinMax::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + std::vector<uint32_t> axis, ::arm_compute::ArgOperation op) +{ + _input = input; + _output = output; + _axis = axis; + _input_rank = input->info()->num_dimensions(); + _op_type = op; +} + +inline const ::arm_compute::TensorShape +inferOutputShape(const ::arm_compute::TensorShape &input_shape, const std::vector<uint32_t> &axis, + int input_rank) +{ + ::arm_compute::TensorShape out_shape{}; + size_t dim = 1; + for (int i = 0; i < input_rank; ++i) + { + dim = input_shape[i]; + out_shape.set(i, dim); + } + + for (int i = 0; i < axis.size(); ++i) + { + out_shape.set(axis[i], 1); + } + + return out_shape; +} + +template <typename T> +inline T getArgMinMaxEle(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, + const ::arm_compute::TensorShape &output_shape, const size_t b, + const size_t d, const size_t h, const size_t w, const int axis, + const ::arm_compute::ArgOperation op_type) +{ + // If output[dimention] == 1, will check all values of that dimension because of reducing + // dimension. + // Else will check only one value. + const size_t start_b = output_shape[3] == 1 ? 0 : b; + const size_t start_d = output_shape[2] == 1 ? 0 : d; + const size_t start_h = output_shape[1] == 1 ? 0 : h; + const size_t start_w = output_shape[0] == 1 ? 0 : w; + const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b; + const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d; + const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h; + const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w; + + ::arm_compute::Coordinates id{w, h, d, b}; + ::arm_compute::Coordinates min_max_id{w, h, d, b}; + + T value = *reinterpret_cast<T *>(input->ptr_to_element(id)); + T tval = *reinterpret_cast<T *>(input->ptr_to_element(id)); + + for (size_t in_b = start_b; in_b <= stop_b; ++in_b) + { + id.set(3, in_b); + for (size_t in_d = start_d; in_d <= stop_d; ++in_d) + { + id.set(2, in_d); + for (size_t in_h = start_h; in_h <= stop_h; ++in_h) + { + id.set(1, in_h); + for (size_t in_w = start_w; in_w <= stop_w; ++in_w) + { + id.set(0, in_w); + if (op_type == ::arm_compute::ArgOperation::MIN) + { + value = std::min<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id))); + } + else if (op_type == ::arm_compute::ArgOperation::MAX) + { + value = std::max<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id))); + } + else + throw std::runtime_error("This Arg operation is not supported, yet"); + + if (tval != value) + { + min_max_id = id; + tval = value; + } + } + } + } + } + + return min_max_id[axis]; +} + +template <typename T> +inline void +getArgMinMax(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape, + const ::arm_compute::TensorShape &output_shape, ::arm_compute::ITensor *output, + const int axis, const ::arm_compute::ArgOperation op_type) +{ + ::arm_compute::Coordinates id; + for (size_t out_b = 0; out_b < output_shape[3]; ++out_b) + { + id.set(3, out_b); + for (size_t out_d = 0; out_d < output_shape[2]; ++out_d) + { + id.set(2, out_d); + for (size_t out_h = 0; out_h < output_shape[1]; ++out_h) + { + id.set(1, out_h); + for (size_t out_w = 0; out_w < output_shape[0]; ++out_w) + { + id.set(0, out_w); + *reinterpret_cast<int *>(output->ptr_to_element(id)) = getArgMinMaxEle<T>( + input, input_shape, output_shape, out_b, out_d, out_h, out_w, axis, op_type); + } + } + } + } +} + +void SimpleArgMinMax::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + ::arm_compute::TensorShape input_shape = _input->info()->tensor_shape(); + + // Axis dimension is 1 and size is 1. + // TODO support axis size > 1. + int axis_val = _axis[0]; + ::arm_compute::TensorShape output_shape = inferOutputShape(input_shape, _axis, _input_rank); + + _output->info()->set_tensor_shape(output_shape); + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::QASYMM8: + getArgMinMax<uint8_t>(_input, input_shape, output_shape, _output, axis_val, _op_type); + break; + case ::arm_compute::DataType::S32: + getArgMinMax<int32_t>(_input, input_shape, output_shape, _output, axis_val, _op_type); + break; + case ::arm_compute::DataType::F32: + getArgMinMax<float>(_input, input_shape, output_shape, _output, axis_val, _op_type); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + _output->info()->set_tensor_shape(output_shape); + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h new file mode 100644 index 000000000..b90e74579 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_ARG_MIN_MAX_H__ +#define __SIMPLE_ARG_MIN_MAX_H__ + +#include "internal/arm_compute.h" +#include "arm_compute/core/TypesEx.h" + +class SimpleArgMinMax : public ::arm_compute::IFunction +{ +public: + SimpleArgMinMax(void) : _input(nullptr), _output(nullptr), _axis(), _input_rank(0) + { + // DO NOTHING + } + +public: + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[out] output Output tensor. + * @param[in] axis Dimension along which to find Min or Max Index. + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + std::vector<uint32_t> axis, ::arm_compute::ArgOperation _op_type); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + std::vector<uint32_t> _axis; + int _input_rank; + ::arm_compute::ArgOperation _op_type; +}; + +#endif /*__SIMPLE_ARG_MIN_MAX_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h index 31c927b4f..aed9ae286 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h @@ -14,15 +14,36 @@ * limitations under the License. */ +/** + * @file       SimpleArithmeticAddition.h + * @brief      This file contains SimpleArithmeticAddition class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __SIMPLE_ARITHMETIC_ADDITION_H__ #define __SIMPLE_ARITHMETIC_ADDITION_H__ #include "internal/arm_compute.h" #include <arm_compute/core/ITensor.h> +/** + * @brief Class to run SimpleArithmeticAddition Layer + */ class SimpleArithmeticAddition : public ::arm_compute::IFunction { public: + SimpleArithmeticAddition(void) : _lhs(nullptr), _rhs(nullptr), _out(nullptr) + { + // DO NOTHING + } + + /** + * @brief Configure the layer + * @param[in] lhs Lefthand-side operand + * @param[in] rhs Righthand-side operand + * @param[in] out The destination tensor(Result operand) + * @return N/A + */ void configure(::arm_compute::ITensor *lhs, ::arm_compute::ITensor *rhs, ::arm_compute::ITensor *out) { @@ -32,6 +53,10 @@ public: } public: + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run(void) override { if (::internal::arm_compute::isGpuMode()) diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc new file mode 100644 index 000000000..87175ee1a --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleBatchToSpaceNd.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, + const ::arm_compute::Coordinates &axises) +{ + const auto rank = axises.num_dimensions(); + assert(rank == 4); + + for (int i = 0; i < rank; ++i) + assert(axises[i] >= 0 && axises[i] < rank); + + _input = input; + _output = output; + _block_size = block_size; + _axises = axises; +} + +template <typename T> +inline void BatchToSpaceND(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, + const int32_t *block_size_data, ::arm_compute::ITensor *output, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int depth = output_shape[axises[3]]; + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_h = 0; out_h < output_height; ++out_h) + { + for (int out_w = 0; out_w < output_width; ++out_w) + { + for (int out_d = 0; out_d < depth; ++out_d) + { + const int in_d = out_d; + const int in_h = out_h / block_size_data[0]; + const int in_w = out_w / block_size_data[1]; + const int in_b = + out_b + + ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) * + output_batch; + + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); + + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input->ptr_to_element(input_id)); + } + } + } + } +} +void SimpleBatchToSpaceND::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + BatchToSpaceND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + BatchToSpaceND<float>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h new file mode 100644 index 000000000..5695d9719 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h @@ -0,0 +1,51 @@ +/* + *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__ +#define __SIMPLE_BATCH_TO_SPACE_ND_H__ + +#include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" + +class SimpleBatchToSpaceND : public ::arm_compute::IFunction +{ +public: + SimpleBatchToSpaceND(void) : _input(nullptr), _output(nullptr), _block_size(nullptr), _axises{} + { + // DO NOTHING + } + + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[out] output Output tensor. + * @param[in] block_size Block size. + * @param[in] axises Axises of rank 4 + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + const int32_t *block_size, + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + const int32_t *_block_size; + ::arm_compute::Coordinates _axises; +}; + +#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc new file mode 100644 index 000000000..7c7706a78 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "internal/layers/SimpleCastLayer.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleCastLayer::castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out, + const arm_compute::Coordinates &id) +{ + switch (in->info()->data_type()) + { + case ::arm_compute::DataType::F32: + { + copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id); + break; + } + case ::arm_compute::DataType::S32: + { + copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id); + break; + } + case ::arm_compute::DataType::U32: + { + copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id); + break; + } + case ::arm_compute::DataType::QASYMM8: + { + const uint8_t quantizedValue = *(in->ptr_to_element(id)); + copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id); + break; + } + default: + throw std::runtime_error("Not supported, yet"); + break; + } +} + +void SimpleCastLayer::configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out) +{ + _in = in; + _out = out; +} + +void SimpleCastLayer::run(void) +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + CAST_CL(_in)->map(q); + CAST_CL(_out)->map(q); + } + + arm_compute::Window window; + window.use_tensor_dimensions(_out->info()->tensor_shape()); + + execute_window_loop(window, + [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); }); + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + CAST_CL(_out)->unmap(q); + CAST_CL(_in)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h index fa3006438..f9a48b481 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h @@ -14,80 +14,55 @@ * limitations under the License. */ +/** + * @file       SimpleCastLayer.h + * @brief      This file contains SimpleCastLayer class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __SIMPLE_CAST_LAYER_H__ #define __SIMPLE_CAST_LAYER_H__ -#include <arm_compute/core/ITensor.h> - #include "internal/arm_compute.h" -#include "internal/op/Cast.h" +#include "internal/arm_compute/Cast.h" +/** + * @brief Class to run SimpleCast Layer + */ class SimpleCastLayer : public ::arm_compute::IFunction { public: - void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out) + SimpleCastLayer(void) : _in(nullptr), _out(nullptr) { - _in = in; - _out = out; + // DO NOTHING } -public: - void run(void) override - { - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_in)->map(q); - CAST_CL(_out)->map(q); - } - - arm_compute::Window window; - window.use_tensor_dimensions(_out->info()->tensor_shape()); + /** + * @brief Configure the layer + * @param[in] in The source tensor + * @param[in] out The destination tensor + * @return N/A + */ + void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out); - execute_window_loop(window, - [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); }); - - if (::internal::arm_compute::isGpuMode()) - { - auto &q = ::arm_compute::CLScheduler::get().queue(); - CAST_CL(_out)->unmap(q); - CAST_CL(_in)->unmap(q); - } - } + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ + void run(void) override; +private: + /** + * @brief Cast and copy data from one tensor to another + * + * @param[in] in The source tensor + * @param[out] out The destination tensor + * @param[in] id Coordinates to copy + * @return N/A + */ void castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out, - const arm_compute::Coordinates &id) - { - switch (in->info()->data_type()) - { - case ::arm_compute::DataType::F32: - { - copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::S32: - { - copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::U32: - { - copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id); - break; - } - case ::arm_compute::DataType::QASYMM8: - { - const uint8_t quantizedValue = *(in->ptr_to_element(id)); - copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id); - break; - } - default: - throw std::runtime_error("Not supported, yet"); - break; - } - } + const arm_compute::Coordinates &id); -private: ::arm_compute::ITensor *_in; ::arm_compute::ITensor *_out; }; diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc new file mode 100644 index 000000000..d62a8321b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleDepthToSpace.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + int32_t block_size, const ::arm_compute::Coordinates &axises) +{ + const auto rank = axises.num_dimensions(); + assert(rank == 4); + for (int i = 0; i < rank; ++i) + { + assert(axises[i] >= 0); + assert(axises[i] < rank); + } + + _input = input; + _output = output; + _block_size = block_size; + _axises = axises; +} + +template <typename T> +inline void DepthToSpace(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, int32_t block_size, + ::arm_compute::ITensor *output, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int output_depth = output_shape[axises[3]]; + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_h = 0; out_h < output_height; ++out_h) + { + for (int out_w = 0; out_w < output_width; ++out_w) + { + for (int out_d = 0; out_d < output_depth; ++out_d) + { + const int in_b = out_b; + const int in_h = out_h / block_size; + const int in_w = out_w / block_size; + const int in_d = + out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; + + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); + + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input->ptr_to_element(input_id)); + } + } + } + } +} + +void SimpleDepthToSpace::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h new file mode 100644 index 000000000..1032aaa47 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_DEPTH_TO_SPACE_H__ +#define __SIMPLE_DEPTH_TO_SPACE_H__ + +#include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" + +class SimpleDepthToSpace : public ::arm_compute::IFunction +{ +public: + SimpleDepthToSpace(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{} + { + // DO NOTHING + } + +public: + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[out] output Output tensor. + * @param[in] block_size Block size. + * @param[in] axises Axises of rank 4 + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + int32_t _block_size; + ::arm_compute::Coordinates _axises; +}; + +#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc index 089c783c1..ae740bb10 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc @@ -1,3 +1,18 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "internal/layers/SimpleEmbeddingLookup.h" #include <arm_compute/runtime/CL/CLScheduler.h> @@ -6,7 +21,8 @@ void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values, ::arm_compute::ITensor *output) { - // Assume that verification of operands are already done at Planner::visit() + assert(values->info()->num_dimensions() == output->info()->num_dimensions()); + assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4); _lookups = lookups; _values = values; _output = output; @@ -25,85 +41,62 @@ void SimpleEmbeddingLookup::run() // type of elements of lookups is always integer const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer()); - const auto values_buf = _values->buffer(); - auto output_buf = _output->buffer(); const auto lookups_info = _lookups->info(); const auto values_info = _values->info(); const auto output_info = _output->info(); - // TODO Refactor below duplicated code! - const auto values_rank = values_info->num_dimensions(); - switch (values_rank) + // NOTE The first dimension's position is always at the end of dimensions. + const auto first_dim_pos = values_info->num_dimensions() - 1; + + const size_t first_dim = values_info->dimension(first_dim_pos); + for (size_t i = 0; i < lookups_info->dimension(0); ++i) { - case 2: - // (H,W) in nnapi -> (W,H) in acl - { - const size_t row_size = values_info->dimension(1); - const size_t row_bytes = values_info->total_size() / row_size; - for (size_t i = 0; i < lookups_info->dimension(0); ++i) - { - if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size) - throw std::runtime_error("Embedding Lookup: index out of bounds."); - - size_t idx = lookups_buf[i]; - size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx}); - size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i}); - - unsigned char *sink_addr = output_buf + row_offset_by_i; - unsigned char *source_addr = values_buf + row_offset_by_idx; - memcpy(sink_addr, source_addr, row_bytes); - } - } - break; - case 3: - // (B,H,W) in nnapi -> (W,H,B) in acl - { - const size_t row_size = values_info->dimension(2); - const size_t row_bytes = values_info->total_size() / row_size; - for (size_t i = 0; i < lookups_info->dimension(0); ++i) - { - if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size) - throw std::runtime_error("Embedding Lookup: index out of bounds."); - - size_t idx = lookups_buf[i]; - size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, idx}); - size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, i}); - - unsigned char *sink_addr = output_buf + row_offset_by_i; - unsigned char *source_addr = values_buf + row_offset_by_idx; - memcpy(sink_addr, source_addr, row_bytes); - } - } - break; - case 4: - // (N,H,W,C) in nnapi -> (N,C,H,W) in acl - { - const size_t row_size = values_info->dimension(3); - const size_t row_bytes = values_info->total_size() / row_size; - for (size_t i = 0; i < lookups_info->dimension(0); ++i) - { - if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size) - throw std::runtime_error("Embedding Lookup: index out of bounds."); - - size_t idx = lookups_buf[i]; - size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, 0, idx}); - size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, 0, i}); - - unsigned char *sink_addr = output_buf + row_offset_by_i; - unsigned char *source_addr = values_buf + row_offset_by_idx; - memcpy(sink_addr, source_addr, row_bytes); - } - } - break; - case 1: - // In this case, shape of values actually is matrix but the height(row size) is 1 in acl. If - // row size is 1, this op is not needed and it means this situtation could be wrong. - throw std::runtime_error("Wrong usage of EmbeddingLookup op!"); - default: - throw std::runtime_error("Not supported rank!"); + if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim) + throw std::runtime_error("Embedding Lookup: index out of bounds."); } + // If each strides of values and output are different, applied padding size of the two tensors are + // different, therefore, it can not be copied at once. + auto can_copy_at_once = [&]() -> bool { + const auto &values_strides = values_info->strides_in_bytes(); + const auto &output_strides = output_info->strides_in_bytes(); + + for (size_t i = 0; i < first_dim_pos; ++i) + { + if (values_strides[i] != values_strides[i]) + return false; + } + + return true; + }; + + using ::arm_compute::Window; + using ::arm_compute::Iterator; + + size_t copy_bytes; + Window window; + if (can_copy_at_once()) + { + copy_bytes = values_info->total_size() / first_dim; + window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); + } + else + { + copy_bytes = values_info->dimension(0) * values_info->element_size(); + window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); + } + + Iterator it(_output, window); + execute_window_loop(window, + [&](const ::arm_compute::Coordinates &id) { + ::arm_compute::Coordinates values_id = id; + const int idx = id[first_dim_pos]; + values_id.set(first_dim_pos, lookups_buf[idx]); + memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); + }, + it); + if (::internal::arm_compute::isGpuMode()) { auto &q = ::arm_compute::CLScheduler::get().queue(); diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h index 9f2cd977f..fd499437f 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h @@ -1,16 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef __SIMPLE_EMBEDDING_LOOKUP_H__ #define __SIMPLE_EMBEDDING_LOOKUP_H__ #include "internal/arm_compute.h" -#include <arm_compute/core/ITensor.h> -#include <arm_compute/runtime/IFunction.h> +/** + * @file       SimpleEmbeddingLookup.h + * @brief      This file contains SimpleEmbeddingLookup class + * @ingroup    COM_AI_RUNTIME + */ + +/** + * @brief Class to run SimpleEmbeddingLookup Layer + */ class SimpleEmbeddingLookup : public ::arm_compute::IFunction { public: + SimpleEmbeddingLookup(void) : _lookups(nullptr), _values(nullptr), _output(nullptr) + { + // DO NOTHING + } + +public: + /** + * @brief Configure the layer + * @param[in] lookups 1D tensor which contains lookup values + * @param[in] values The source tensor + * @param[in] output The destination tensor + * @return N/A + */ void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values, ::arm_compute::ITensor *output); + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run() override; private: diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc new file mode 100644 index 000000000..7f8ae2505 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleHashtableLookupLayer.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups, + ::arm_compute::ITensor *keys, + ::arm_compute::ITensor *values, + ::arm_compute::ITensor *output, + ::arm_compute::ITensor *hits) +{ + _lookups = lookups; + _keys = keys; + _values = values; + _output = output; + _hits = hits; + _lookup_indices.resize(lookups->info()->dimension(0), -1); +} + +void SimpleHashtableLookupLayer::run() +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_lookups)->map(queue); + CAST_CL(_keys)->map(queue); + CAST_CL(_values)->map(queue); + CAST_CL(_output)->map(queue); + CAST_CL(_hits)->map(queue); + } + + const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer()); + const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer()); + uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer()); + + const auto lookups_info = _lookups->info(); + const auto values_info = _values->info(); + const auto keys_info = _keys->info(); + const auto output_info = _output->info(); + + // NOTE The first dimension's position must be always at the end of dimensions. + const auto first_dim_pos = values_info->num_dimensions() - 1; + const size_t first_dim = values_info->dimension(first_dim_pos); + + std::map<int32_t, size_t> key_map; + const int keys_num = keys_info->dimension(0); + for (size_t key_index = 0; key_index < keys_num; key_index++) + { + key_map[keys_buf[key_index]] = key_index; + } + + const int lookups_num = lookups_info->dimension(0); + for (size_t i = 0; i < lookups_num; ++i) + { + const auto lookup_value = lookups_buf[i]; + const auto it = key_map.find(lookup_value); + if (it != key_map.end()) + { + if (it->second >= first_dim) + throw std::runtime_error("HashTable Lookup: index out of bounds."); + _lookup_indices[i] = it->second; + } + } + + // If each strides of values and output are different, applied padding size of the two tensors are + // different, therefore, it can not be copied at once. + auto can_copy_at_once = [&]() -> bool { + const auto &values_strides = values_info->strides_in_bytes(); + const auto &output_strides = output_info->strides_in_bytes(); + + for (size_t i = 0; i < first_dim_pos; ++i) + { + if (values_strides[i] != values_strides[i]) + return false; + } + + return true; + }; + + using ::arm_compute::Window; + using ::arm_compute::Iterator; + using ::arm_compute::Coordinates; + + size_t copy_bytes; + Window window; + if (can_copy_at_once()) + { + copy_bytes = values_info->total_size() / first_dim; + window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos); + } + else + { + copy_bytes = values_info->dimension(0) * values_info->element_size(); + window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY); + } + + Iterator it(_output, window); + execute_window_loop(window, + [&](const Coordinates &id) { + Coordinates values_id = id; + const int idx = id[first_dim_pos]; + const int lookup_index = _lookup_indices[idx]; + if (lookup_index >= 0) + { + values_id.set(first_dim_pos, lookup_index); + memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes); + hits_buf[lookup_index] = 1; + } + else + { + memset(it.ptr(), 0, copy_bytes); + hits_buf[lookup_index] = 0; + } + }, + it); + + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_lookups)->unmap(queue); + CAST_CL(_keys)->unmap(queue); + CAST_CL(_values)->unmap(queue); + CAST_CL(_output)->unmap(queue); + CAST_CL(_hits)->unmap(queue); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h new file mode 100644 index 000000000..ba9d2ec0d --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_HASHTABLE_LOOKUP_H__ +#define __SIMPLE_HASHTABLE_LOOKUP_H__ + +#include "internal/arm_compute.h" + +class SimpleHashtableLookupLayer : public ::arm_compute::IFunction +{ +public: + SimpleHashtableLookupLayer(void) + : _lookups(nullptr), _keys(nullptr), _values(nullptr), _output(nullptr), _hits(nullptr) + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys, + ::arm_compute::ITensor *values, ::arm_compute::ITensor *output, + ::arm_compute::ITensor *hits); + + void run() override; + +private: + ::arm_compute::ITensor *_lookups; + ::arm_compute::ITensor *_keys; + ::arm_compute::ITensor *_values; + ::arm_compute::ITensor *_output; + ::arm_compute::ITensor *_hits; + std::vector<int32_t> _lookup_indices; +}; + +#endif /*__SIMPLE_HASHTABLE_LOOKUP_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc new file mode 100644 index 000000000..d3943ad40 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleNeg.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleNeg::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) +{ + _input = input; + _output = output; +} + +void SimpleNeg::run() +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->map(queue); + CAST_CL(_output)->map(queue); + } + + arm_compute::Window window; + window.use_tensor_dimensions(_output->info()->tensor_shape()); + + execute_window_loop(window, [this](const arm_compute::Coordinates &id) { + // NOTE Must be two input tensors of identical type + // Must be output tensor of the same type as input0. + assert(_input->info()->data_type() == _output->info()->data_type()); + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::F32: + { + const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id)); + *reinterpret_cast<float *>(_output->ptr_to_element(id)) = -input_value; + break; + } + case ::arm_compute::DataType::S32: + { + const auto input_value = *reinterpret_cast<int32_t *>(_input->ptr_to_element(id)); + *reinterpret_cast<int32_t *>(_output->ptr_to_element(id)) = -input_value; + break; + } + case ::arm_compute::DataType::U32: + { + const auto input_value = *reinterpret_cast<uint32_t *>(_input->ptr_to_element(id)); + *reinterpret_cast<uint32_t *>(_output->ptr_to_element(id)) = -input_value; + break; + } + default: + throw std::runtime_error("Not supported, yet"); + break; + } + }); + + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->unmap(queue); + CAST_CL(_output)->unmap(queue); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h index cb3f36337..4ca88e7f8 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h @@ -1,41 +1,39 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PAD_LAYER_H__
-#define __PAD_LAYER_H__
-
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/functions/CLFillBorder.h>
-
-class PadLayer : public ::arm_compute::IFunction
-{
-public:
- void configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width);
- void run(void) override;
-
-private:
- ::arm_compute::ICLTensor *_input;
- ::arm_compute::ICLTensor *_output;
- int _border_width;
- int _output_height;
- int _output_width;
-
- ::arm_compute::CLFillBorder _fillborderkernel;
- void populateOutput();
-};
-
-#endif // __PAD_LAYER_H__
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_NEG_H__ +#define __SIMPLE_NEG_H__ + +#include "internal/arm_compute.h" + +class SimpleNeg : public ::arm_compute::IFunction +{ +public: + SimpleNeg(void) : _input(nullptr), _output(nullptr) + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; +}; + +#endif /*__SIMPLE_NEG_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc new file mode 100644 index 000000000..2a0a25f0c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "internal/arm_compute.h" +#include "SimplePackLayer.h" + +void SimplePackLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_vector, + ::arm_compute::ICLTensor *output, int32_t axis) +{ + uint32_t nr_inputs = input_vector.size(); + uint32_t output_rank = output->info()->num_dimensions(); + const ::arm_compute::PermutationVector pv{1, 2, 0}; + _cl_permuted_vector.resize(nr_inputs); + _cl_permute_vector.resize(nr_inputs); + + _output = output; + // A negative axis implies axis from the end. + // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1. + // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2. + if (axis < 0) + { + axis += output_rank; + } + _axis = ToARMComputeAxis(output_rank, axis).value(); + _cl_reshape_vector.resize(nr_inputs); + + ::arm_compute::TensorShape subTensor_shape{}; + for (int i = 0; i < output_rank; i++) + { + if (i != _axis) + { + subTensor_shape.set(i, _output->info()->tensor_shape()[i]); + } + else + { + subTensor_shape.set(i, 1); + } + } + + auto subTensor_offset = ::arm_compute::Coordinates{}; + subTensor_offset.set_num_dimensions(output_rank); + + for (int i = 0; i < input_vector.size(); i++) + { + _input_vector.push_back(input_vector[i]); + subTensor_offset[_axis] = i; + auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>( + CAST_CL(_output), subTensor_shape, subTensor_offset, true); + _sub_tensor_vector.push_back(temp_tensor); + // configure to resize of input tensor in sub tensor offseted, dimension expansion will be + // automatic + _cl_permute_vector[i].configure(CAST_CL(_input_vector[i]), &_cl_permuted_vector[i], pv); + _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], _sub_tensor_vector[i].get()); + _cl_permuted_vector[i].allocator()->allocate(); + } +} + +void SimplePackLayer::run(void) +{ + for (int i = 0; i < _input_vector.size(); i++) + { + _cl_permute_vector[i].run(); + _cl_reshape_vector[i].run(); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h new file mode 100644 index 000000000..2c2fc37f2 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __SIMPLE_PACK_LAYER_H__ +#define __SIMPLE_PACK_LAYER_H__ + +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> +#include <arm_compute/runtime/CL/functions/CLPermute.h> + +class SimplePackLayer : public ::arm_compute::IFunction +{ +public: + SimplePackLayer(void) + : _cl_permuted_vector{}, _input_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{}, + _cl_permute_vector{}, _output(nullptr), _axis(0) + { + // DO NOTHING + } + +public: + void configure(const std::vector<::arm_compute::ICLTensor *> &input_vector, + ::arm_compute::ICLTensor *output, int axis); + +public: + void run(void) override; + +private: + std::vector<::arm_compute::CLTensor> _cl_permuted_vector; + std::vector<::arm_compute::ICLTensor *> _input_vector; + std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector; + std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector; + std::vector<::arm_compute::CLPermute> _cl_permute_vector; + ::arm_compute::ICLTensor *_output; + int _axis; +}; + +#endif // __SIMPLE_PACK_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc new file mode 100644 index 000000000..64236603f --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimplePadLayer.h" +#include <arm_compute/runtime/CL/CLScheduler.h> + +namespace +{ +bool validate_arg(const ::arm_compute::ITensor *input, const ::arm_compute::ITensor *output, + const ::arm_compute::ITensor *padding_size, + const ::arm_compute::Coordinates &axises) +{ + const int input_batch = input->info()->tensor_shape()[axises[0]]; + const int input_height = input->info()->tensor_shape()[axises[1]]; + const int input_width = input->info()->tensor_shape()[axises[2]]; + const int input_depth = input->info()->tensor_shape()[axises[3]]; + + const int output_batch = output->info()->tensor_shape()[axises[0]]; + const int output_height = output->info()->tensor_shape()[axises[1]]; + const int output_width = output->info()->tensor_shape()[axises[2]]; + const int output_depth = output->info()->tensor_shape()[axises[3]]; + + auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0})); + auto pad_batch_down = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 0})); + auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1})); + auto pad_height_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1})); + auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2})); + auto pad_width_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2})); + auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3})); + auto pad_depth_back = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 3})); + + const int padded_batch = input_batch + pad_batch_up + pad_batch_down; + const int padded_height = input_height + pad_height_top + pad_height_bottom; + const int padded_width = input_width + pad_width_left + pad_width_right; + const int padded_depth = input_depth + pad_depth_front + pad_depth_back; + + return (padded_batch == output_batch) && (padded_height == output_height) && + (padded_width == output_width) && (padded_depth == output_depth); +} +} // namespace + +void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + ::arm_compute::ITensor *padding_size, + const ::arm_compute::Coordinates &axises) +{ + + const auto rank = axises.num_dimensions(); + assert(rank == 4); + assert(input != nullptr && output != nullptr && padding_size != nullptr); + + for (int i = 0; i < rank; ++i) + { + assert(axises[i] >= 0); + assert(axises[i] < rank); + } + + _input = input; + _output = output; + _padding_size = padding_size; + _axises = axises; +} + +template <typename T> +inline void ApplyPadding(const ::arm_compute::ITensor *input_data, + const ::arm_compute::TensorShape &input_shape, + const ::arm_compute::ITensor *padding_size, + ::arm_compute::ITensor *output_data, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises, T zero_value) +{ + + assert(validate_arg(input_data, output_data, padding_size, axises) && + "Padded Input shape does not match to output shape"); + + const int input_batch = input_shape[axises[0]]; + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + const int input_depth = input_shape[axises[3]]; + + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int output_depth = output_shape[axises[3]]; + + // Padding size for Up, Top, Left and Front are required. + auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0})); + auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1})); + auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2})); + auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3})); + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_h = 0; out_h < output_height; ++out_h) + { + for (int out_w = 0; out_w < output_width; ++out_w) + { + for (int out_d = 0; out_d < output_depth; ++out_d) + { + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); + + if (out_b < pad_batch_up || out_b >= (input_batch + pad_batch_up) || + out_h < pad_height_top || out_h >= (input_height + pad_height_top) || + out_w < pad_width_left || out_w >= (input_width + pad_width_left) || + out_d < pad_depth_front || out_d >= (input_depth + pad_depth_front)) + { + *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = zero_value; + } + else + { + auto input_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b - pad_batch_up, out_h - pad_height_top, + out_w - pad_width_left, out_d - pad_depth_front}, + axises); + *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input_data->ptr_to_element(input_id)); + } + } + } + } + } +} +void SimplePadLayer::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + CAST_CL(_padding_size)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output, + _output->info()->tensor_shape(), _axises, + _input->info()->quantization_info().offset); + break; + case ::arm_compute::DataType::F32: + ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output, + _output->info()->tensor_shape(), _axises, 0.0f); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + CAST_CL(_padding_size)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h new file mode 100644 index 000000000..8cb6659ce --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_PAD_LAYER_H__ +#define __SIMPLE_PAD_LAYER_H__ + +#include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" + +class SimplePadLayer : public ::arm_compute::IFunction +{ +public: + SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{} + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + ::arm_compute::ITensor *padding_size, + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); + + void run(void) override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; + ::arm_compute::ITensor *_padding_size; + ::arm_compute::Coordinates _axises; +}; + +#endif // __SIMPLE_PAD_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc new file mode 100644 index 000000000..b5b3a0950 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleSQRT.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output) +{ + _input = input; + _output = output; +} + +void SimpleSQRT::run() +{ + auto &queue = ::arm_compute::CLScheduler::get().queue(); + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->map(queue); + CAST_CL(_output)->map(queue); + } + + arm_compute::Window window; + window.use_tensor_dimensions(_output->info()->tensor_shape()); + + execute_window_loop(window, [this](const arm_compute::Coordinates &id) { + // NOTE Must be two input tensors of identical type + // Must be output tensor of the same type as input0. + assert(_input->info()->data_type() == _output->info()->data_type()); + + const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id)); + *reinterpret_cast<float *>(_output->ptr_to_element(id)) = sqrt(input_value); + }); + + if (::internal::arm_compute::isGpuMode()) + { + CAST_CL(_input)->unmap(queue); + CAST_CL(_output)->unmap(queue); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h new file mode 100644 index 000000000..b05a9e32e --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_SQRT_H__ +#define __SIMPLE_SQRT_H__ + +#include "internal/arm_compute.h" + +class SimpleSQRT : public ::arm_compute::IFunction +{ +public: + SimpleSQRT(void) : _input(nullptr), _output(nullptr) + { + // DO NOTHING + } + + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_output; +}; + +#endif /*__SIMPLE_SQRT_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc new file mode 100644 index 000000000..f53675b99 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleSpaceToBatchND.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, + ::arm_compute::ITensor *block_size, + ::arm_compute::ITensor *padding_size, + ::arm_compute::ITensor *output) +{ + const auto rank = input->info()->num_dimensions(); + assert(rank == 4); + + _input = input; + _block_size = block_size; + _padding_size = padding_size; + _output = output; +} + +template <typename T> +inline void +SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape, + const ::arm_compute::ITensor *block_size, const ::arm_compute::ITensor *padding_size, + const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, + T zero_value) +{ + const int input_batch = input_shape[3]; + const int input_height = input_shape[1]; + const int input_width = input_shape[0]; + + const int depth = output_shape[2]; + + const int padding_height_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 1})); + const int padding_height_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 1})); + const int padding_width_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 0})); + const int padding_width_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 0})); + const int padded_height = input_height + padding_height_left + padding_height_right; + const int padded_width = input_width + padding_width_left + padding_width_right; + + const int block_size_height = *reinterpret_cast<int *>(block_size->ptr_to_element({1})); + const int block_size_width = *reinterpret_cast<int *>(block_size->ptr_to_element({0})); + + assert(padding_height_left >= 0); + assert(padding_height_right >= 0); + assert(padding_width_left >= 0); + assert(padding_width_right >= 0); + assert(block_size_height >= 1); + assert(block_size_width >= 1); + assert(padded_height % block_size_height == 0); + assert(padded_width % block_size_width == 0); + assert(output->info()->dimension(3) == + input->info()->dimension(3) * (block_size_height * block_size_width)); + + for (int in_b = 0; in_b < input_batch; ++in_b) + { + for (int in_d = 0; in_d < depth; ++in_d) + { + for (int in_h = 0; in_h < padded_height; ++in_h) + { + for (int in_w = 0; in_w < padded_width; ++in_w) + { + const int out_d = in_d; + const int out_h = in_h / block_size_height; + const int out_w = in_w / block_size_width; + const int out_b = + in_b + + ((in_h % block_size_height) * block_size_width + in_w % block_size_width) * + input_batch; + + const ::arm_compute::Coordinates output_id{out_w, out_h, out_d, out_b}; + + if (in_h < padding_height_left || in_h >= (input_height + padding_height_left) || + in_w < padding_width_left || in_w >= (input_width + padding_width_left)) + { + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = zero_value; + } + else + { + const ::arm_compute::Coordinates input_id{in_w - padding_width_left, + in_h - padding_height_left, in_d, in_b}; + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input->ptr_to_element(input_id)); + } + } + } + } + } +} +void SimpleSpaceToBatchND::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_block_size)->map(q); + CAST_CL(_padding_size)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size, + _output, _output->info()->tensor_shape(), + _input->info()->quantization_info().offset); + break; + case ::arm_compute::DataType::F32: + SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size, + _output, _output->info()->tensor_shape(), 0.0f); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_block_size)->unmap(q); + CAST_CL(_padding_size)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h new file mode 100644 index 000000000..4af961d34 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SIMPLE_SPACE_TO_BATCHND_H__ +#define __SIMPLE_SPACE_TO_BATCHND_H__ + +#include "internal/arm_compute.h" + +class SimpleSpaceToBatchND : public ::arm_compute::IFunction +{ +public: + SimpleSpaceToBatchND(void) + : _input(nullptr), _block_size(nullptr), _padding_size(nullptr), _output(nullptr) + { + // DO NOTHING + } + + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[in] block_size Block size. + * @param[in] padding_size Padding size. + * @param[out] output Output tensor. + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *block_size, + ::arm_compute::ITensor *padding_size, ::arm_compute::ITensor *output); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_block_size; + ::arm_compute::ITensor *_padding_size; + ::arm_compute::ITensor *_output; +}; + +#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc index 682295f81..3519da1f3 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc @@ -19,11 +19,8 @@ #include <arm_compute/runtime/CL/CLScheduler.h> void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, - int32_t block_size, - const ::arm_compute::Coordinates &axises = {3, 1, 0, 2}) + int32_t block_size, const ::arm_compute::Coordinates &axises) { - assert(input->info()->num_dimensions() == 4); - assert(output->info()->num_dimensions() == 4); const auto rank = axises.num_dimensions(); assert(rank == 4); for (int i = 0; i < rank; ++i) @@ -38,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute: _axises = axises; } -inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w, - int32_t d, const ::arm_compute::Coordinates &axises) -{ - // b, h, w, d >= 0 - size_t indexes[4]; - indexes[axises[0]] = b; - indexes[axises[1]] = h; - indexes[axises[2]] = w; - indexes[axises[3]] = d; - - int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0]; - offset += indexes[2] * shape[1] * shape[0]; - offset += indexes[1] * shape[0]; - offset += indexes[0]; - return offset; -} - template <typename T> -inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape, - int32_t block_size, T *output_data, +inline void SpaceToDepth(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, int32_t block_size, + ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape, const ::arm_compute::Coordinates &axises) { @@ -66,16 +47,6 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape & const int input_width = input_shape[axises[2]]; const int input_depth = input_shape[axises[3]]; - const int output_batch = output_shape[axises[0]]; - const int output_height = output_shape[axises[1]]; - const int output_width = output_shape[axises[2]]; - const int output_depth = output_shape[axises[3]]; - - assert(input_batch == output_batch); - assert(input_height == output_height * block_size); - assert(input_width == output_width * block_size); - assert(input_depth * block_size * block_size == output_depth); - for (int in_b = 0; in_b < input_batch; ++in_b) { for (int in_h = 0; in_h < input_height; ++in_h) @@ -90,10 +61,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape & const int out_d = in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth; - const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises); - const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises); + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); - output_data[output_index] = input_data[input_index]; + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input->ptr_to_element(input_id)); } } } @@ -110,35 +84,16 @@ void SimpleSpaceToDepth::run() CAST_CL(_output)->map(q); } - auto input_buf = _input->buffer(); - auto output_buf = _output->buffer(); switch (_input->info()->data_type()) { case ::arm_compute::DataType::U8: case ::arm_compute::DataType::QASYMM8: - SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast<uint8_t *>(output_buf), - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::S8: - SpaceToDepth(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast<int8_t *>(output_buf), - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::U32: - SpaceToDepth(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast<uint32_t *>(output_buf), - _output->info()->tensor_shape(), _axises); - break; - case ::arm_compute::DataType::S32: - SpaceToDepth(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast<int32_t *>(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; case ::arm_compute::DataType::F32: - SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(), - _block_size, reinterpret_cast<float *>(output_buf), - _output->info()->tensor_shape(), _axises); + SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); break; default: ARM_COMPUTE_ERROR("DataType not supported"); diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h index f5e028b1c..9e87c364c 100644 --- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h @@ -14,25 +14,44 @@ * limitations under the License. */ +/** + * @file       SimpleSpaceToDepth.h + * @brief      This file contains SimpleSpaceToDepth class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __SIMPLE_SPACE_TO_DEPTH_H__ #define __SIMPLE_SPACE_TO_DEPTH_H__ #include "internal/arm_compute.h" -#include <arm_compute/core/ITensor.h> -#include <arm_compute/runtime/IFunction.h> +#include "internal/arm_compute/Cast.h" +/** + * @brief Class to run SimpleEmbeddingLookup Layer + */ class SimpleSpaceToDepth : public ::arm_compute::IFunction { public: - /** Initialise input and output - * - * @param[in] input First tensor input. - * @param[out] output Output tensor. - * @param[in] block_size Block size. + SimpleSpaceToDepth(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{} + { + // DO NOTHING + } + + /** + * @brief Configure the layer + * @param[in] input First tensor input. + * @param[in] output Output tensor. + * @param[in] block_size Block size. + * @param[in] axises Axises of rank 4 + * @return N/A */ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size, - const ::arm_compute::Coordinates &axises); + const ::arm_compute::Coordinates &axises = getARMComputeAxises(4)); + /** + * @brief Run the operation. Must be called after configure(). + * @return N/A + */ void run() override; private: diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc new file mode 100644 index 000000000..abc291289 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleTransposeConv.h" +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, + ::arm_compute::ITensor *output, + ::arm_compute::PadStrideInfo &tconv_info, + ::arm_compute::Coordinates axises) +{ + auto rank = axises.num_dimensions(); + + assert(rank == 4); + + _input = input; + _weights = weights; + _output = output; + _stride_width = tconv_info.stride().first; + _stride_height = tconv_info.stride().second; + _pad_width = tconv_info.pad_left(); + _pad_height = tconv_info.pad_top(); + _axises = axises; +} + +template <typename T> +inline void ApplyTransposeConv( + const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data, + const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data, + const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data, + const int32_t stride_width, const int32_t stride_height, const int32_t pad_width, + const int32_t pad_height, const ::arm_compute::Coordinates axises) +{ + const int batches = input_shape[axises[0]]; + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + const int input_depth = input_shape[axises[3]]; + + const int filter_height = filter_shape[axises[1]]; + const int filter_width = filter_shape[axises[2]]; + + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int output_depth = output_shape[axises[3]]; + + assert(batches == output_shape[axises[0]]); + assert(input_depth == filter_shape[axises[3]]); + assert(filter_shape[axises[0]] == output_depth); + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a + // "scatter" access pattern, where we loop through all the input elements, + // computing their influence on the output, rather than looping through the + // output elements in the typical "gather" access pattern of a conv. We + // therefore must initialize the output array to zero. + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) + { + for (int in_y = 0; in_y < input_height; ++in_y) + { + for (int in_x = 0; in_x < input_width; ++in_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) + { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) + { + auto input_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises); + auto filter_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel}, + axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises); + T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id)); + T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id)); + *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) += + input_value * filter_value; + } + } + } + } + } + } + } + } +} + +void SimpleTransposeConv::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_weights)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::S32: + ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input, + _weights->info()->tensor_shape(), _weights, + _output->info()->tensor_shape(), _output, _stride_width, + _stride_height, _pad_width, _pad_height, _axises); + break; + case ::arm_compute::DataType::F32: + ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input, + _weights->info()->tensor_shape(), _weights, + _output->info()->tensor_shape(), _output, _stride_width, + _stride_height, _pad_width, _pad_height, _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_weights)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h new file mode 100644 index 000000000..c5519828b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TRANSPOSE_CONV_EX__ +#define __TRANSPOSE_CONV_EX__ + +#include "internal/arm_compute.h" +#include "internal/arm_compute/Cast.h" + +class SimpleTransposeConv : public ::arm_compute::IFunction +{ +public: + SimpleTransposeConv() + : _input(nullptr), _weights(nullptr), _output(nullptr), _stride_width(0), _stride_height(0), + _pad_width(0), _pad_height(0) + { + // DO NOTHING + } + + /** Initialise input and output + * + * @param[in] input First tensor input. + * @param[in] weights Weights + * @param[out] output Output tensor. + * @param[in] tc_info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] axises Axises of rank 4 + */ + void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, + ::arm_compute::ITensor *output, ::arm_compute::PadStrideInfo &tconv_info, + ::arm_compute::Coordinates axises = getARMComputeAxises(4)); + + void run() override; + +private: + ::arm_compute::ITensor *_input; + ::arm_compute::ITensor *_weights; + ::arm_compute::ITensor *_output; + int32_t _stride_width; + int32_t _stride_height; + int32_t _pad_width; + int32_t _pad_height; + ::arm_compute::Coordinates _axises; +}; + +#endif /*__TRANSPOSE_CONV_EX__ */ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc new file mode 100644 index 000000000..910595a44 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "internal/arm_compute.h" +#include "SimpleUnpackLayer.h" + +void SimpleUnpackLayer::configure(::arm_compute::ICLTensor *input, + const std::vector<::arm_compute::ICLTensor *> &output_vector, + int32_t axis) +{ + uint32_t nr_outputs = output_vector.size(); + _cl_permuted_vector.resize(nr_outputs); + _cl_permute_vector.resize(nr_outputs); + uint32_t input_rank = input->info()->num_dimensions(); + const ::arm_compute::PermutationVector pv{2, 0, 1}; + _input = input; + // Negatige axis is supported, -1 implies R-1 axis where R is input rank + if (axis < 0) + { + axis += input_rank; + } + _axis = ToARMComputeAxis(input_rank, axis).value(); + _cl_reshape_vector.resize(nr_outputs); + + ::arm_compute::TensorShape subTensor_shape{}; + for (int i = 0; i < input_rank; i++) + { + if (i != _axis) + { + subTensor_shape.set(i, _input->info()->tensor_shape()[i]); + } + else + { + subTensor_shape.set(i, 1); + } + } + + auto subTensor_offset = ::arm_compute::Coordinates{}; + subTensor_offset.set_num_dimensions(input_rank); + + for (int i = 0; i < output_vector.size(); i++) + { + _output_vector.push_back(output_vector[i]); + subTensor_offset[_axis] = i; + auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>( + CAST_CL(_input), subTensor_shape, subTensor_offset, true); + _sub_tensor_vector.push_back(temp_tensor); + // Copies into the subtensor + _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv); + _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i])); + _cl_permuted_vector[i].allocator()->allocate(); + } +} + +void SimpleUnpackLayer::run(void) +{ + for (int i = 0; i < _output_vector.size(); i++) + { + _cl_permute_vector[i].run(); + _cl_reshape_vector[i].run(); + } +} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h new file mode 100644 index 000000000..52fc7513d --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __UNPACK_LAYER_H__ +#define __UNPACK_LAYER_H__ + +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h> +#include <arm_compute/runtime/CL/functions/CLPermute.h> + +class SimpleUnpackLayer : public ::arm_compute::IFunction +{ +public: + SimpleUnpackLayer(void) + : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{}, + _cl_permute_vector{}, _input(nullptr), _axis(0) + { + // DO NOTHING + } + +public: + void configure(::arm_compute::ICLTensor *input, + const std::vector<::arm_compute::ICLTensor *> &output_vector, int32_t axis); + +public: + void run(void) override; + +private: + std::vector<::arm_compute::CLTensor> _cl_permuted_vector; + std::vector<::arm_compute::ICLTensor *> _output_vector; + std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector; + std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector; + std::vector<::arm_compute::CLPermute> _cl_permute_vector; + ::arm_compute::ICLTensor *_input; + int32_t _axis; +}; + +#endif // __UNPACK_LAYER_H__ diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc deleted file mode 100644 index 3f988a819..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "SquaredDifferenceOperation.h" -#include "internal/arm_compute.h" - -void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1, - ::arm_compute::ITensor *input2, - ::arm_compute::ITensor *output, - ::arm_compute::ConvertPolicy ConvertPolicy, float scale, - ::arm_compute::RoundingPolicy RoundingPolicy) -{ - _input1 = input1; - _input2 = input2; - _output = output; - - if (::internal::arm_compute::isGpuMode()) - { - _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), CAST_CL(output), ConvertPolicy); - _cl_mul.configure(CAST_CL(output), CAST_CL(output), CAST_CL(output), scale, ConvertPolicy, - RoundingPolicy); - } - else - { - _neon_sub.configure(CAST_NE(input1), CAST_NE(input2), CAST_NE(output), ConvertPolicy); - _neon_mul.configure(CAST_NE(output), CAST_NE(output), CAST_NE(output), scale, ConvertPolicy, - RoundingPolicy); - } -} - -void SquaredDifferenceOperation::run(void) -{ - if (::internal::arm_compute::isGpuMode()) - { - _cl_sub.run(); - _cl_mul.run(); - } - else - { - _neon_sub.run(); - _neon_mul.run(); - } -} diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h deleted file mode 100644 index 3782c4e8c..000000000 --- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __SQUARED_DIFFERENCE_OPERATION_H__ -#define __SQUARED_DIFFERENCE_OPERATION_H__ - -#include <arm_compute/runtime/Tensor.h> -#include <arm_compute/runtime/CL/CLTensor.h> - -#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h> -#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h> -#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h> -#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h> - -class SquaredDifferenceOperation : public ::arm_compute::IFunction -{ -public: - void configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2, - ::arm_compute::ITensor *output, ::arm_compute::ConvertPolicy ConvertPolicy, - float scale, ::arm_compute::RoundingPolicy RoundingPolicy); - -public: - void run(void) override; - -private: - ::arm_compute::ITensor *_input1; - ::arm_compute::ITensor *_input2; - - ::arm_compute::ITensor *_output; - -private: - ::arm_compute::CLArithmeticSubtraction _cl_sub; - ::arm_compute::CLPixelWiseMultiplication _cl_mul; - - ::arm_compute::NEArithmeticSubtraction _neon_sub; - ::arm_compute::NEPixelWiseMultiplication _neon_mul; -}; -#endif // __SQUARED_DIFFERENCE_OPERATION_H__ diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h index 764b9b13a..ac25692a1 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h @@ -14,12 +14,17 @@ * limitations under the License. */ +/** + * @file Reader.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::nnapi::feature::Reader + */ #ifndef __INTERNAL_NNAPI_FEATURE_READER_H__ #define __INTERNAL_NNAPI_FEATURE_READER_H__ #include "internal/nnapi/feature/Utils.h" -#include "util/feature/Reader.h" +#include "misc/feature/Reader.h" namespace internal { @@ -28,20 +33,40 @@ namespace nnapi namespace feature { -template <typename T> class Reader final : public nnfw::util::feature::Reader<T> +/** + * @brief Class to support reading element in feature(3D, 4D) + */ +template <typename T> class Reader final : public nnfw::misc::feature::Reader<T> { public: + /** + * @brief Construct a new Reader object + * @param[in] shape Shape of feature + * @param[in] ptr Pointer to feature data + * @param[in] len Size of tensor (byte) + */ // NOTE The parameter len denotes the number of bytes. - Reader(const ::nnfw::util::feature::Shape &shape, const T *ptr, size_t len) + Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); } public: - const nnfw::util::feature::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape of feature + * @return Shape of feature + */ + const nnfw::misc::feature::Shape &shape(void) const { return _shape; } public: + /** + * @brief Get value of element using channel, row, and column index for 3D feature + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t ch, uint32_t row, uint32_t col) const override { uint32_t index = index_of(_shape, ch, row, col); @@ -51,6 +76,14 @@ public: return arr[index]; } + /** + * @brief Get value of element using batch, channel, row, and column index for 4D feature + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override { uint32_t index = index_of(_shape, batch, ch, row, col); @@ -59,7 +92,7 @@ public: } private: - nnfw::util::feature::Shape _shape; + nnfw::misc::feature::Shape _shape; private: const T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h index a64ff5d63..ee59d217e 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h @@ -14,10 +14,15 @@ * limitations under the License. */ +/** + * @file Utils.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines utility functions used in internal::nnapi::feature namespace + */ #ifndef __INTERNAL_NNAPI_FEATURE_UTILS_H__ #define __INTERNAL_NNAPI_FEATURE_UTILS_H__ -#include "util/feature/Shape.h" +#include "misc/feature/Shape.h" namespace internal { @@ -26,7 +31,15 @@ namespace nnapi namespace feature { -inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch, uint32_t row, +/** + * @brief Get position of element using channel, row, and column for 3D feature + * @param[in] shape Shape of feature + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Position of element + */ +inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t ch, uint32_t row, uint32_t col) { uint32_t res = 0; @@ -39,7 +52,16 @@ inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch, return res; } -inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t batch, uint32_t ch, +/** + * @brief Get position of element using batch, channel, row, and column for 4D feature + * @param[in] shape Shape of feature + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Position of element + */ +inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { uint32_t res = 0; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h index 083b6b055..965e42f1c 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h @@ -14,12 +14,17 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::nnapi::feature::View class + */ #ifndef __INTERNAL_NNAPI_FEATURE_VIEW_H__ #define __INTERNAL_NNAPI_FEATURE_VIEW_H__ #include "internal/nnapi/feature/Utils.h" -#include "util/feature/Reader.h" +#include "misc/feature/Reader.h" namespace internal { @@ -28,25 +33,55 @@ namespace nnapi namespace feature { -template <typename T> class View final : public nnfw::util::feature::Reader<T> +/** + * @brief Class to access feature's element information using index + */ +template <typename T> class View final : public nnfw::misc::feature::Reader<T> { public: + /** + * @brief Construct a new View object + * @param[in] shape Shape of feature + * @param[in] ptr Pointer to feature data + * @param[in] len Size of feature (byte) + * @return + */ // NOTE The parameter len denotes the number of bytes. - View(const ::nnfw::util::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr} + View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); } public: - const nnfw::util::feature::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape of feature + * @return Shape of feature + */ + const nnfw::misc::feature::Shape &shape(void) const { return _shape; } public: + /** + * @brief Get value of element in 3D feature using channel, row, and column index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t ch, uint32_t row, uint32_t col) const override { uint32_t index = index_of(_shape, ch, row, col); return _ptr[index]; } + + /** + * @brief Get value of element in 4D feature using batch, channel, row and column index + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override { uint32_t index = index_of(_shape, batch, ch, row, col); @@ -54,12 +89,28 @@ public: return _ptr[index]; } + /** + * @brief Get reference of element in 3D feature using channel, row, and column index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Reference of element + */ T &at(uint32_t ch, uint32_t row, uint32_t col) { uint32_t index = index_of(_shape, ch, row, col); return _ptr[index]; } + + /** + * @brief Get reference of element in 4D feature using batch, channel, row and column index + * @param[in] batch Batch index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Reference of element + */ T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { uint32_t index = index_of(_shape, batch, ch, row, col); @@ -68,7 +119,7 @@ public: } private: - nnfw::util::feature::Shape _shape; + nnfw::misc::feature::Shape _shape; private: T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h index 0853a8c89..ae964f74c 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file Reader.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::nnapi::kernel::Reader class + */ #ifndef __INTERNAL_NNAPI_KERNEL_READER_H__ #define __INTERNAL_NNAPI_KERNEL_READER_H__ -#include "util/kernel/Shape.h" -#include "util/kernel/Reader.h" +#include "misc/kernel/Shape.h" +#include "misc/kernel/Reader.h" namespace internal { @@ -27,20 +32,41 @@ namespace nnapi namespace kernel { -template <typename T> class Reader final : public nnfw::util::kernel::Reader<T> +/** + * @brief Class to support reading element in kernel + */ +template <typename T> class Reader final : public nnfw::misc::kernel::Reader<T> { public: + /** + * @brief Construct a new Reader object + * @param[in] shape Shape of kernel + * @param[in] ptr Pointer to kernel data + * @param[in] len Size of kernel (byte) + */ // NOTE The parameter len denotes the number of bytes. - Reader(const ::nnfw::util::kernel::Shape &shape, const T *ptr, size_t len) + Reader(const ::nnfw::misc::kernel::Shape &shape, const T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); } public: - const nnfw::util::kernel::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape of kernel + * @return Shape of kernel + */ + const nnfw::misc::kernel::Shape &shape(void) const { return _shape; } public: + /** + * @brief Get value of element for kernel + * @param[in] nth Kernel index + * @param[in] ch Channel index + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override { // NNAPI uses NHWC ordering @@ -55,7 +81,7 @@ public: } private: - nnfw::util::kernel::Shape _shape; + nnfw::misc::kernel::Shape _shape; private: const T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h index f6f0f3908..f03a4be31 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file Reader.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::nnapi::matrix::Reader class + */ #ifndef __INTERNAL_NNAPI_MATRIX_READER_H__ #define __INTERNAL_NNAPI_MATRIX_READER_H__ -#include "util/matrix/Shape.h" -#include "util/matrix/Reader.h" +#include "misc/matrix/Shape.h" +#include "misc/matrix/Reader.h" namespace internal { @@ -27,20 +32,39 @@ namespace nnapi namespace matrix { -template <typename T> class Reader final : public nnfw::util::matrix::Reader<T> +/** + * @brief Class to support reading element in matrix + */ +template <typename T> class Reader final : public nnfw::misc::matrix::Reader<T> { public: + /** + * @brief Construct a new Reader object + * @param[in] shape Shape of matrix + * @param[in] ptr Pointer to matrix data + * @param[in] len Size of matrix (byte) + */ // NOTE The parameter len denotes the number of bytes. - Reader(const ::nnfw::util::matrix::Shape &shape, const T *ptr, size_t len) + Reader(const ::nnfw::misc::matrix::Shape &shape, const T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.H * shape.W * sizeof(T) == len); } public: - const nnfw::util::matrix::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape of matrix + * @return Shape of matrix + */ + const nnfw::misc::matrix::Shape &shape(void) const { return _shape; } public: + /** + * @brief Get value of element for matrix + * @param[in] row Row index + * @param[in] col Column index + * @return Value of element + */ T at(uint32_t row, uint32_t col) const override { // NNAPI uses NHWC ordering @@ -53,7 +77,7 @@ public: } private: - nnfw::util::matrix::Shape _shape; + nnfw::misc::matrix::Shape _shape; private: const T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h index 38d1b291b..6a3fff646 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file       ConstView.h + * @brief      This file contains ConstView class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__ #define __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__ @@ -27,21 +33,36 @@ namespace nnapi namespace tensor { +/** + * @brief Wrapper class to read tensor values + * @tparam T The tensor element type + */ template <typename T> class ConstView { public: - ConstView(const ::nnfw::util::tensor::Shape &shape, const uint8_t *ptr, size_t len) + /** + * @brief Construct a ConstView class + * @param[in] shape Tensor shape + * @param[in] ptr The base pointer of actual data + * @param[in] len The number of bytes + */ + ConstView(const ::nnfw::misc::tensor::Shape &shape, const uint8_t *ptr, size_t len) : _shape{shape}, _ptr{ptr}, _len{len} { // DO NOTHING } public: - const nnfw::util::tensor::Shape &shape(void) const { return _shape; } + const nnfw::misc::tensor::Shape &shape(void) const { return _shape; } private: // TODO Make this as a helper function, and share it for both View<T> and ConstView<T> - uint32_t offset_of(const nnfw::util::tensor::Index &index) const + /** + * @brief Calculate offset for the given tensor index + * @param[in] index Tensor index + * @return The calculated offset + */ + uint32_t offset_of(const nnfw::misc::tensor::Index &index) const { if (_shape.rank() == 0) { @@ -61,7 +82,12 @@ private: } public: - T at(const nnfw::util::tensor::Index &index) const + /** + * @brief Get the value on the given index + * @param[in] index Flattened tensor index + * @return The value on the given index + */ + T at(const nnfw::misc::tensor::Index &index) const { const auto offset = offset_of(index); @@ -71,7 +97,7 @@ public: } private: - const nnfw::util::tensor::Shape _shape; + const nnfw::misc::tensor::Shape _shape; private: const uint8_t *const _ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h index fe89e572e..cc51db594 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h @@ -14,11 +14,17 @@ * limitations under the License. */ +/** + * @file       Reader.h + * @brief      This file contains Reader class + * @ingroup    COM_AI_RUNTIME + */ + #ifndef __INTERNAL_NNAPI_TENSOR_READER_H__ #define __INTERNAL_NNAPI_TENSOR_READER_H__ #include <vector> -#include "util/tensor/Reader.h" +#include "misc/tensor/Reader.h" namespace internal { @@ -27,11 +33,20 @@ namespace nnapi namespace tensor { -template <typename T> class Reader final : public nnfw::util::tensor::Reader<T> +/** + * @brief Wrapper class to read tensor values + * @tparam T The tensor element type + */ +template <typename T> class Reader final : public nnfw::misc::tensor::Reader<T> { public: - // NOTE The parameter len denotes the number of bytes. - Reader(const ::nnfw::util::tensor::Shape &shape, const T *ptr, size_t len) + /** + * @brief Construct a Reader class + * @param[in] shape Tensor shape + * @param[in] ptr The base pointer of actual data + * @param[in] len The number of bytes + */ + Reader(const ::nnfw::misc::tensor::Shape &shape, const T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.element_nums() * sizeof(T) == len); @@ -39,10 +54,19 @@ public: } public: - const nnfw::util::tensor::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape object + * @return The shape as const reference + */ + const nnfw::misc::tensor::Shape &shape(void) const { return _shape; } public: - T at(const nnfw::util::tensor::Index &index_nnapi) const override + /** + * @brief Get the value on the given index + * @param[in] index_nnapi Flattened tensor index + * @return The value on the given index + */ + T at(const nnfw::misc::tensor::Index &index_nnapi) const override { uint32_t offset = 0; @@ -53,17 +77,19 @@ public: } private: - /* - Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr, - we need to calculate the offset of index [i4, .. i1] as follows: - offset = i4 * (d3 * d2 * d1) + - i3 * (d2 * d1) + - i2 * (d1) + - i1 - So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is calculate. - To minimize this repetitive calculation, - _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0] - */ + /** + * @brief Initializes @c _stridess + * @return N/A + * @note Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr, + we need to calculate the offset of index [i4, .. i1] as follows: + offset = i4 * (d3 * d2 * d1) + + i3 * (d2 * d1) + + i2 * (d1) + + i1 + So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is + calculate. To minimize this repetitive calculation, + _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0] + */ void initialize(void) { for (int r = 0; r < _shape.rank(); r++) @@ -76,7 +102,7 @@ private: } private: - nnfw::util::tensor::Shape _shape; + nnfw::misc::tensor::Shape _shape; private: const T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h index 80e1bb057..f8f297f97 100644 --- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h +++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h @@ -14,11 +14,16 @@ * limitations under the License. */ +/** + * @file View.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::nnapi::tensor::View class + */ #ifndef __INTERNAL_NNAPI_TENSOR_VIEW_H__ #define __INTERNAL_NNAPI_TENSOR_VIEW_H__ -#include "util/tensor/Shape.h" -#include "util/tensor/Index.h" +#include "misc/tensor/Shape.h" +#include "misc/tensor/Index.h" namespace internal { @@ -27,20 +32,38 @@ namespace nnapi namespace tensor { +/** + * @brief Class to access tensor's element information using index + */ template <typename T> class View { public: + /** + * @brief Construct a new View object + * @param[in] shape Shape of tensor + * @param[in] ptr Pointer to tensor data + * @param[in] len Size of tensor (byte) + */ // NOTE The parameter len denotes the number of bytes. - View(const ::nnfw::util::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr} + View(const ::nnfw::misc::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr} { assert(shape.element_nums() * sizeof(T) == len); } public: - const nnfw::util::tensor::Shape &shape(void) const { return _shape; } + /** + * @brief Get shape of tensor + * @return Shape of tensor + */ + const nnfw::misc::tensor::Shape &shape(void) const { return _shape; } private: - uint32_t offset_of(const nnfw::util::tensor::Index &index) const + /** + * @brief Get position of element using index in tensor + * @param[in] index Index of element + * @return Position of element + */ + uint32_t offset_of(const nnfw::misc::tensor::Index &index) const { if (_shape.rank() == 0) { @@ -60,14 +83,24 @@ private: } public: - T at(const nnfw::util::tensor::Index &index) const + /** + * @brief Get value of element at index + * @param[in] index Index of element + * @return Value of element at index + */ + T at(const nnfw::misc::tensor::Index &index) const { const auto offset = offset_of(index); return _ptr[offset]; } - T &at(const nnfw::util::tensor::Index &index) + /** + * @brief Get reference of element at index + * @param[in] index Index of element + * @return Reference of element at index + */ + T &at(const nnfw::misc::tensor::Index &index) { const auto offset = offset_of(index); @@ -75,7 +108,7 @@ public: } private: - nnfw::util::tensor::Shape _shape; + nnfw::misc::tensor::Shape _shape; private: T *_ptr; diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.cc b/runtimes/pure_arm_compute/src/internal/op/Abs.cc new file mode 100644 index 000000000..e23a9538c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Abs.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Abs.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Abs +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Abs +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Abs +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 1 && outputCount == 1); + + output_index = outputs[0]; + input_index = inputs[0]; +} + +} // namespace Abs +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.h b/runtimes/pure_arm_compute/src/internal/op/Abs.h new file mode 100644 index 000000000..0be8b0205 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Abs.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_ABS_H__ +#define __INTERNAL_OP_ABS_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Abs +{ + +struct Param +{ + int32_t output_index; + int32_t input_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Abs +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_ABS_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Add.h b/runtimes/pure_arm_compute/src/internal/op/Add.h index 42ed5b976..a7804a569 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Add.h +++ b/runtimes/pure_arm_compute/src/internal/op/Add.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Add.h + * @brief This file contains accept function and params for Add operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_ADD_H__ #define __INTERNAL_OP_ADD_H__ @@ -30,33 +36,66 @@ namespace op namespace Add { +/** + * @brief Struct of Add operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t lhs_index; - int32_t rhs_index; - int32_t activation_index; + int32_t lhs_index; /**< Left hand side index */ + int32_t rhs_index; /**< Right hand side index */ + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for Add as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Add with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Add + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for Add with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for Add + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for Add + * @return Parameters of Add + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for Add + * @param [in] v Node visitor for invoking visit function of Add + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc new file mode 100644 index 000000000..485430377 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "internal/op/ArgMax.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ArgMax +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace ArgMax +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ArgMax +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + ifm_index = inputs[0]; + axis_index = inputs[1]; +} + +} // namespace ArgMax +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.h b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h new file mode 100644 index 000000000..780af2232 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_ARGMAX_H__ +#define __INTERNAL_OP_ARGMAX_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ArgMax +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + int32_t axis_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace ArgMax +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_ARGMAX_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h index 729f6043c..cf9061ca9 100644 --- a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file AvgPool2D.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::AvgPool2D Param structs + * and internal::tflite::op::AvgPool2D Node classes + */ #ifndef __INTERNAL_OP_AVG_POOL_2D_H__ #define __INTERNAL_OP_AVG_POOL_2D_H__ @@ -32,44 +38,75 @@ namespace AvgPool2D namespace Explicit { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Index of output feature map */ - int32_t ifm_index; + int32_t ifm_index; /**< Index of input feature map */ - int32_t kw_index; - int32_t kh_index; + int32_t kw_index; /**< Index of kernel width */ + int32_t kh_index; /**< Index of kernel height */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ - int32_t padding_left_index; - int32_t padding_right_index; - int32_t padding_top_index; - int32_t padding_bottom_index; - - int32_t activation_index; + int32_t padding_left_index; /**< Index of padding left */ + int32_t padding_right_index; /**< Index of padding right */ + int32_t padding_top_index; /**< Index of padding top */ + int32_t padding_bottom_index; /**< Index of padding bottom */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: @@ -81,40 +118,71 @@ private: namespace Implicit { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; + int32_t ofm_index; /**< Index of output feature map */ - int32_t kw_index; - int32_t kh_index; + int32_t ifm_index; /**< Index of input feature map */ - int32_t hstride_index; - int32_t vstride_index; + int32_t kw_index; /**< Index of kernel width */ + int32_t kh_index; /**< Index of kernel height */ - int32_t padding_index; - int32_t activation_index; + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ + int32_t padding_index; /**< Index of padding */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc new file mode 100644 index 000000000..0768039d0 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc @@ -0,0 +1,63 @@ +/*Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "internal/op/BatchToSpaceNd.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace BatchToSpaceNd +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace BatchToSpaceNd +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace BatchToSpaceNd +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + input_index = inputs[0]; + block_size_index = inputs[1]; +} + +} // namespace BatchToSpaceNd +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h new file mode 100644 index 000000000..a514cb44c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_BATCHTOSPACE_ND_H__ +#define __INTERNAL_OP_BATCHTOSPACE_ND_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace BatchToSpaceNd +{ + +struct Param +{ + int32_t output_index; + + int32_t input_index; + int32_t block_size_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace BatchToSpaceNd +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace BatchToSpaceNd +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace BatchToSpaceNd +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_BATCHTOSPACE_Nd_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Cast.h b/runtimes/pure_arm_compute/src/internal/op/Cast.h index 3b3795189..8af741a16 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Cast.h +++ b/runtimes/pure_arm_compute/src/internal/op/Cast.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Cast.h + * @brief This file contains accept function and params for Cast operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_CAST_H__ #define __INTERNAL_OP_CAST_H__ @@ -30,31 +36,64 @@ namespace op namespace Cast { +/** + * @brief Struct of Cast operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t input_index; + int32_t input_index; /**< Input index */ + /** + * @brief Construct a new Param object for Cast as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Cast with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Cast + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for Cast with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for Cast + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for Cast + * @return Parameters of Cast + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for Cast + * @param [in] v Node visitor for invoking visit function of Cast + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Concat.h b/runtimes/pure_arm_compute/src/internal/op/Concat.h index 185cba3e1..207f964fb 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Concat.h +++ b/runtimes/pure_arm_compute/src/internal/op/Concat.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Concat.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Concat node + */ + #ifndef __INTERNAL_OP_CONCAT_H__ #define __INTERNAL_OP_CONCAT_H__ @@ -31,36 +37,68 @@ namespace op namespace Concat { +/** + * @brief Struct to manipulate parameter for Concat operation + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; //!< index for output - std::vector<int32_t> ifm_indexes; - int32_t axis_index; + std::vector<int32_t> ifm_indexes; //!< index for input + int32_t axis_index; //!< index for axis + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define Concat Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Concat Node object + * @param param Parameter for Concat Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for Concat node }; } // namespace Concat diff --git a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h index b04b8c85f..de46fbb9c 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Conv2D.h + * @brief This file contains accept function and params for Conv2D operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_CONV_2D_H__ #define __INTERNAL_OP_CONV_2D_H__ @@ -32,43 +38,76 @@ namespace Conv2D namespace Explicit { +/** + * @brief Struct of Conv2D(explicit) operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; - int32_t ker_index; - int32_t bias_index; + int32_t ifm_index; /**< Input format index */ + int32_t ker_index; /**< Kernel index */ + int32_t bias_index; /**< Bias index */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Horizontal stride index */ + int32_t vstride_index; /**< Vertical stride index */ - int32_t padding_left_index; - int32_t padding_right_index; - int32_t padding_top_index; - int32_t padding_bottom_index; + int32_t padding_left_index; /**< Left padding index */ + int32_t padding_right_index; /**< Right padding index */ + int32_t padding_top_index; /**< Top padding index */ + int32_t padding_bottom_index; /**< Bottomd padding index */ - int32_t activation_index; + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for Conv2D(explicit) as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Conv2D(explicit) with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Conv2D(explicit) + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for conv2D(explicit) with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for conv2D(explicit) + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for conv2D(explicit) + * @return Parameters of conv2D(explicit) + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for conv2D(explicit) + * @param [in] v Node visitor for invoking visit function of conv2D(explicit) + * @return N/A + */ void accept(NodeVisitor &&) const override; private: @@ -80,39 +119,72 @@ private: namespace Implicit { +/** + * @brief Struct of Conv2D(implicit) operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; - int32_t ker_index; - int32_t bias_index; + int32_t ifm_index; /**< Input format index */ + int32_t ker_index; /**< Kernel index */ + int32_t bias_index; /**< Bias index */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Horizontal stride index */ + int32_t vstride_index; /**< Vertical stride index */ - int32_t padding_index; - int32_t activation_index; + int32_t padding_index; /**< Padding index */ + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for Conv2D(implicit) as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Conv2D(implicit) with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Conv2D(implicit) + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for conv2D(implicit) with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for conv2D(implicit) + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for conv2D(implicit) + * @return Parameters of conv2D(implicit) + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for conv2D(implicit) + * @param [in] v Node visitor for invoking visit function of conv2D(implicit) + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc new file mode 100644 index 000000000..db164a148 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/DepthToSpace.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthToSpace +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace DepthToSpace +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthToSpace +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + input_index = inputs[0]; + block_size_index = inputs[1]; +} + +} // namespace DepthToSpace +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h new file mode 100644 index 000000000..dd4c5c914 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_DEPTHTOSPACE_H__ +#define __INTERNAL_OP_DEPTHTOSPACE_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthToSpace +{ + +struct Param +{ + int32_t output_index; + + int32_t input_index; + int32_t block_size_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace DepthToSpace +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_DEPTHTOSPACE_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h index 77ab4b63e..c63e30aae 100644 --- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file DepthwiseConv2D.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::DepthwiseConv2D Param structs + * and internal::tflite::op::DepthwiseConv2D Node classes + */ #ifndef __INTERNAL_OP_DEPTHWISE_CONV_2D_H__ #define __INTERNAL_OP_DEPTHWISE_CONV_2D_H__ @@ -32,44 +38,75 @@ namespace DepthwiseConv2D namespace Explicit { +/** + * @brief Struct to have indexes for explicit padding DepthwiseConv2D operation parameter + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Index of output feature map */ - int32_t ifm_index; - int32_t ker_index; - int32_t bias_index; + int32_t ifm_index; /**< Index of input feature map */ + int32_t ker_index; /**< Index of kernel */ + int32_t bias_index; /**< Index of bias */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ - int32_t padding_left_index; - int32_t padding_right_index; - int32_t padding_top_index; - int32_t padding_bottom_index; - - int32_t multipler_index; - int32_t activation_index; + int32_t padding_left_index; /**< Index of padding left */ + int32_t padding_right_index; /**< Index of padding right */ + int32_t padding_top_index; /**< Index of padding top */ + int32_t padding_bottom_index; /**< Index of padding bottom */ + int32_t multipler_index; /**< Index of multipler */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an explicit padding DepthwiseConv2D operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: @@ -78,43 +115,74 @@ private: } // namespace Explicit +/** + * @brief Struct to have indexes for implicit padding DepthwiseConv2D operation parameter + */ namespace Implicit { struct Param { - int32_t ofm_index; - - int32_t ifm_index; - int32_t ker_index; - int32_t bias_index; + int32_t ofm_index; /**< Index of output feature map */ - int32_t hstride_index; - int32_t vstride_index; + int32_t ifm_index; /**< Index of input feature map */ + int32_t ker_index; /**< Index of kernel */ + int32_t bias_index; /**< Index of bias */ - int32_t padding_index; - int32_t multipler_index; - int32_t activation_index; + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ + int32_t padding_index; /**< Index of padding */ + int32_t multipler_index; /**< Index of multipler */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an implicit padding DepthwiseConv2D operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h index b0645d136..f19898e9e 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h +++ b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Dequantize.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Dequantize::Param struct + * and internal::tflite::op::Dequantize::Node class + */ #ifndef __INTERNAL_OP_DEQUANTIZE_H__ #define __INTERNAL_OP_DEQUANTIZE_H__ @@ -30,31 +36,62 @@ namespace op namespace Dequantize { +/** + * @brief Struct to have indexes for Dequantize operation parameter + */ struct Param { - int32_t output_index; - - int32_t input_index; + int32_t output_index; /**< Index of output feature map */ + int32_t input_index; /**< Index of input feature map */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an Dequantize operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Div.h b/runtimes/pure_arm_compute/src/internal/op/Div.h index 06ed7ec21..d5fc09d19 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Div.h +++ b/runtimes/pure_arm_compute/src/internal/op/Div.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Div.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Div::Param struct + * and internal::tflite::op::Div::Node class + */ #ifndef __INTERNAL_OP_DIV_H__ #define __INTERNAL_OP_DIV_H__ @@ -30,33 +36,64 @@ namespace op namespace Div { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t lhs_index; - int32_t rhs_index; - int32_t activation_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t lhs_index; /**< Index of lhs */ + int32_t rhs_index; /**< Index of rhs */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h index 4547f27c7..17e8485f7 100644 --- a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h +++ b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file EmbeddingLookup.h + * @brief This file contains accept function and params for EmbeddingLookup operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_EMBEDDING_LOOKUP_H__ #define __INTERNAL_OP_EMBEDDING_LOOKUP_H__ @@ -30,32 +36,65 @@ namespace op namespace EmbeddingLookup { +/** + * @brief Struct of EmbeddingLookup operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t lookups_index; - int32_t values_index; + int32_t lookups_index; /**< Lookups index */ + int32_t values_index; /**< Values index */ + /** + * @brief Construct a new Param object for EmbeddingLookup as default + */ Param() = default; + + /** + * @brief Construct a new Param object for EmbeddingLookup with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for EmbeddingLookup + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for EmbeddingLookup with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for EmbeddingLookup + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for EmbeddingLookup + * @return Parameters of EmbeddingLookup + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for EmbeddingLookup + * @param [in] v Node visitor for invoking visit function of EmbeddingLookup + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.cc b/runtimes/pure_arm_compute/src/internal/op/Equal.cc new file mode 100644 index 000000000..b9cccc6a9 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Equal.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Equal.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Equal +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Equal +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Equal +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input1 Tensor Index + // 1 -> input2 Tensor Index + input1_index = inputs[0]; + input2_index = inputs[1]; +} + +} // namespace Equal +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.h b/runtimes/pure_arm_compute/src/internal/op/Equal.h new file mode 100644 index 000000000..78b9f846f --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Equal.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_EQUAL_H__ +#define __INTERNAL_OP_EQUAL_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Equal +{ + +struct Param +{ + int32_t output_index; + + int32_t input1_index; + int32_t input2_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace Equal +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Equal +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Equal +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_EQUAL_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.cc b/runtimes/pure_arm_compute/src/internal/op/Exp.cc new file mode 100644 index 000000000..6f1aa8f42 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Exp.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Exp.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Exp +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Exp +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Exp +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 1 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + ifm_index = inputs[0]; +} + +} // namespace Exp +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.h b/runtimes/pure_arm_compute/src/internal/op/Exp.h new file mode 100644 index 000000000..ac7f244b7 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Exp.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_EXP_H__ +#define __INTERNAL_OP_EXP_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Exp +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Exp +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_EXP_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Floor.h b/runtimes/pure_arm_compute/src/internal/op/Floor.h index 8cf2a841c..5264ec10c 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Floor.h +++ b/runtimes/pure_arm_compute/src/internal/op/Floor.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Floor.h + * @brief This file contains accept function and params for Floor operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_FLOOR_H__ #define __INTERNAL_OP_FLOOR_H__ @@ -30,31 +36,64 @@ namespace op namespace Floor { +/** + * @brief Struct of Floor operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t input_index; + int32_t input_index; /**< Input index */ + /** + * @brief Construct a new Param object for Floor as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Floor with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Floor + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for Floor with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for Floor + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for Floor + * @return Parameters of Floor + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for Floor + * @param [in] v Node visitor for invoking visit function of Floor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h index 7a425a6af..434308435 100644 --- a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h +++ b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file FullyConnected.h + * @brief This file contains accept function and params for FullyConnected operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_FULLY_CONNTECTED_H__ #define __INTERNAL_OP_FULLY_CONNTECTED_H__ @@ -30,34 +36,70 @@ namespace op namespace FullyConnected { +/** + * @brief Struct of FullyConnected operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t input_index; - int32_t weight_index; - int32_t bias_index; - int32_t activation_index; + int32_t input_index; /**< Input index */ + int32_t weight_index; /**< Weight index */ + int32_t bias_index; /**< Bias index */ + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for FullyConnected as default + */ Param() = default; + + /** + * @brief Construct a new Param object for FullyConnected with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for FullyConnected + */ class Node final : public op::Node { + /** + * @brief Construct a new Node object for FullyConnected with param + * @param [in] param Parameters for Node + */ public: + /** + * @brief Destroy the Node object for FullyConnected + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for FullyConnected + */ virtual ~Node() = default; public: + /** + * @brief Parameter Get parameters for FullyConnected + * @return _param Parameters of FullyConnected + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for FullyConnected + * @param [in] v Node visitor for invoking visit function of FullyConnected + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Gather.h b/runtimes/pure_arm_compute/src/internal/op/Gather.h index 5f7fe956f..4470236eb 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Gather.h +++ b/runtimes/pure_arm_compute/src/internal/op/Gather.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Gather.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Gather operation + */ + #ifndef __INTERNAL_OP_GATHER_H__ #define __INTERNAL_OP_GATHER_H__ @@ -30,37 +36,69 @@ namespace op namespace Gather { +/** + * @brief Struct to manipulate parameter for Gather operation + */ struct Param { - int32_t ofm_index; // output + int32_t ofm_index; //!< index for output feature map - int32_t lhs_index; // input - int32_t rhs_index; // indexes - int32_t axis_index; // axis + int32_t lhs_index; //!< index for lhs tensor + int32_t rhs_index; //!< index for rhs tensor + int32_t axis_index; //!< index for axis + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define Gather Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Tanh Node object + * @param param Parameter for Tanh Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for Gather node }; } // namespace Gather diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc index 30a853a64..7e04ecf82 100644 --- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc +++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc @@ -1,52 +1,68 @@ -#include "internal/op/HashtableLookup.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 3 && outputCount == 2);
-
- output_index = outputs[0];
- hits_index = outputs[1];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lookups Index
- // 1 -> Keys Index
- // 2 -> Values Index
- lookups_index = inputs[0];
- keys_index = inputs[1];
- values_index = inputs[2];
-}
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/HashtableLookup.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace HashtableLookup +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace HashtableLookup +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace HashtableLookup +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 3 && outputCount == 2); + + output_index = outputs[0]; + hits_index = outputs[1]; + + // Each input should be interpreted as follows: + // + // 0 -> Lookups Index + // 1 -> Keys Index + // 2 -> Values Index + lookups_index = inputs[0]; + keys_index = inputs[1]; + values_index = inputs[2]; +} + +} // namespace HashtableLookup +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h index 192da2aae..a5b43d1c7 100644 --- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h +++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h @@ -1,56 +1,109 @@ -#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-struct Param
-{
- int32_t output_index;
- int32_t hits_index;
-
- int32_t lookups_index;
- int32_t values_index;
- int32_t keys_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param ¶m) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param ¶m(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file HashtableLookup.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::HashtableLookup::Param struct + * and internal::tflite::op::HashtableLookup::Node class + */ +#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__ +#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace HashtableLookup +{ + +/** + * @brief Struct to have indexes for operation parameter + */ +struct Param +{ + int32_t output_index; /**< Index of output feature map */ + int32_t hits_index; /**< Index of hits */ + + int32_t lookups_index; /**< Index of lookups */ + int32_t values_index; /**< Index of values */ + int32_t keys_index; /**< Index of keys */ + /** + * @brief Construct as default + */ + Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to represent an operation of data structure + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destruct as default + */ + virtual ~Node() = default; + +public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace HashtableLookup +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc index 449540178..44a6ee63d 100644 --- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc +++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include "internal/op/L2Normalization.h" #include "internal/op/NodeVisitor.h" diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h index 70fa2fb7e..2e94fac11 100644 --- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h +++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file L2Normalization.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::L2Normalization::Param struct + * and internal::tflite::op::L2Normalization::Node class + */ #ifndef __INTERNAL_OP_L2_NORMALIZATION_H__ #define __INTERNAL_OP_L2_NORMALIZATION_H__ @@ -14,31 +36,62 @@ namespace op namespace L2Normalization { +/** + * @brief Struct to have indexes for L2Normalization operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t ifm_index; /**< Index of input feature map */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an L2Normalization operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc index 73c1bb65c..64041ab49 100644 --- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc +++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc @@ -1,124 +1,124 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/L2Pool2D.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 10 && outputCount == 1);
-
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
- ifm_index = inputs[0];
- padding_left_index = inputs[1];
- padding_right_index = inputs[2];
- padding_top_index = inputs[3];
- padding_bottom_index = inputs[4];
- hstride_index = inputs[5];
- vstride_index = inputs[6];
- kw_index = inputs[7];
- kh_index = inputs[8];
- activation_index = inputs[9];
-}
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 7 && outputCount == 1);
-
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
- ifm_index = inputs[0];
- padding_index = inputs[1];
- hstride_index = inputs[2];
- vstride_index = inputs[3];
- kw_index = inputs[4];
- kh_index = inputs[5];
- activation_index = inputs[6];
-}
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/L2Pool2D.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace L2Pool2D +{ +namespace Explicit +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Explicit + +namespace Implicit +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Implicit +} // namespace L2Pool2D +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace L2Pool2D +{ +namespace Explicit +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 10 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Tensor Index + // 1 -> Padding_left index + // 2 -> Padding_right index + // 3 -> Padding_top index + // 4 -> Padding_bottom index + // 5 -> Horizontal (over width) Stride Index + // 6 -> Vertial (over height) Stride Index + // 7 -> Filter Width Index + // 8 -> Filter Height Index + // 9 -> FuseCode (activation) Index + ifm_index = inputs[0]; + padding_left_index = inputs[1]; + padding_right_index = inputs[2]; + padding_top_index = inputs[3]; + padding_bottom_index = inputs[4]; + hstride_index = inputs[5]; + vstride_index = inputs[6]; + kw_index = inputs[7]; + kh_index = inputs[8]; + activation_index = inputs[9]; +} + +} // namespace Explicit + +namespace Implicit +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 7 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Tensor Index + // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 2 -> Horizontal (over width) Stride Index + // 3 -> Vertial (over height) Stride Index + // 4 -> Filter Width Index + // 5 -> Filter Height Index + // 6 -> FuseCode (activation) Index + ifm_index = inputs[0]; + padding_index = inputs[1]; + hstride_index = inputs[2]; + vstride_index = inputs[3]; + kw_index = inputs[4]; + kh_index = inputs[5]; + activation_index = inputs[6]; +} + +} // namespace Implicit +} // namespace L2Pool2D +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h index f4a25539b..facb223c7 100644 --- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h @@ -1,130 +1,198 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_L2_POOL_2D_H__
-#define __INTERNAL_OP_L2_POOL_2D_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-struct Param
-{
- int32_t ofm_index;
-
- int32_t ifm_index;
-
- int32_t kw_index;
- int32_t kh_index;
-
- int32_t hstride_index;
- int32_t vstride_index;
-
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
-
- int32_t activation_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param ¶m) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param ¶m(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-struct Param
-{
- int32_t ofm_index;
-
- int32_t ifm_index;
-
- int32_t kw_index;
- int32_t kh_index;
-
- int32_t hstride_index;
- int32_t vstride_index;
-
- int32_t padding_index;
- int32_t activation_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param ¶m) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param ¶m(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_L2_POOL_2D_H__
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file L2Pool2D.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::L2Pool2D Param structs + * and internal::tflite::op::L2Pool2D Node classes + */ +#ifndef __INTERNAL_OP_L2_POOL_2D_H__ +#define __INTERNAL_OP_L2_POOL_2D_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace L2Pool2D +{ +namespace Explicit +{ + +/** + * @brief Struct to have indexes for operation parameter + */ +struct Param +{ + int32_t ofm_index; /**< Index of output feature map */ + + int32_t ifm_index; /**< Index of input feature map */ + + int32_t kw_index; /**< Index of kernel width */ + int32_t kh_index; /**< Index of kernel height */ + + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ + + int32_t padding_left_index; /**< Index of padding left */ + int32_t padding_right_index; /**< Index of padding right */ + int32_t padding_top_index; /**< Index of padding top */ + int32_t padding_bottom_index; /**< Index of padding bottom */ + + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ + Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to represent an operation of data structure + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destruct as default + */ + virtual ~Node() = default; + +public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Explicit + +namespace Implicit +{ + +/** + * @brief Struct to have indexes for operation parameter + */ +struct Param +{ + int32_t ofm_index; /**< Index of output feature map */ + + int32_t ifm_index; /**< Index of input feature map */ + + int32_t kw_index; /**< Index of kernel width */ + int32_t kh_index; /**< Index of kernel height */ + + int32_t hstride_index; /**< Index of horizontal stride */ + int32_t vstride_index; /**< Index of vertical stride */ + + int32_t padding_index; /**< Index of padding */ + int32_t activation_index; /**< Index of activation */ + /** + * @brief Construct as default + */ + Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to represent an operation of data structure + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destruct as default + */ + virtual ~Node() = default; + +public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Implicit +} // namespace L2Pool2D +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_L2_POOL_2D_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc new file mode 100644 index 000000000..b7419d923 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/LocalResponseNormalization.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LocalResponseNormalization +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace LocalResponseNormalization +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LocalResponseNormalization +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 5 && outputCount == 1); + + ofm_index = outputs[0]; + + ifm_index = inputs[0]; + radius_index = inputs[1]; + bias_index = inputs[2]; + alpha_index = inputs[3]; + beta_index = inputs[4]; +} + +} // namespace LocalResponseNormalization +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h new file mode 100644 index 000000000..29e0699ad --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__ +#define __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LocalResponseNormalization +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + int32_t radius_index; + int32_t bias_index; + int32_t alpha_index; + int32_t beta_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace LocalResponseNormalization +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc new file mode 100644 index 000000000..5b7da4d3b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/LogicalAnd.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalAnd +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace LogicalAnd +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalAnd +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input1 Tensor Index + // 1 -> input2 Tensor Index + input1_index = inputs[0]; + input2_index = inputs[1]; +} + +} // namespace LogicalAnd +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h new file mode 100644 index 000000000..2f53f756d --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_LOGICAL_AND_H__ +#define __INTERNAL_OP_LOGICAL_AND_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalAnd +{ + +struct Param +{ + int32_t output_index; + + int32_t input1_index; + int32_t input2_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace LogicalAnd +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalAnd +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace LogicalAnd +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_LOGICAL_AND_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc new file mode 100644 index 000000000..4cb6a8e2a --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/LogicalNot.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalNot +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace LogicalNot +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalNot +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 1 && outputCount == 1); + + output_index = outputs[0]; + + input_index = inputs[0]; +} + +} // namespace LogicalNot +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h new file mode 100644 index 000000000..9593deafe --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_LOGICAL_NOT_H__ +#define __INTERNAL_OP_LOGICAL_NOT_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalNot +{ + +struct Param +{ + int32_t output_index; + + int32_t input_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace LogicalNot +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalNot +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace LogicalNot +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_LOGICAL_NOT_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc new file mode 100644 index 000000000..8295f6f0b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/LogicalOr.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalOr +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace LogicalOr +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalOr +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input1 Tensor Index + // 1 -> input2 Tensor Index + input1_index = inputs[0]; + input2_index = inputs[1]; +} + +} // namespace LogicalOr +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h new file mode 100644 index 000000000..6487fa720 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_LOGICAL_OR_H__ +#define __INTERNAL_OP_LOGICAL_OR_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalOr +{ + +struct Param +{ + int32_t output_index; + + int32_t input1_index; + int32_t input2_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace LogicalOr +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace LogicalOr +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace LogicalOr +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_LOGICAL_OR_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Logistic.h b/runtimes/pure_arm_compute/src/internal/op/Logistic.h index db8935846..a42fdc0d4 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Logistic.h +++ b/runtimes/pure_arm_compute/src/internal/op/Logistic.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Logistic.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Logistic::Param struct + * and internal::tflite::op::Logistic::Node class + */ #ifndef __INTERNAL_OP_LOGISTIC_H__ #define __INTERNAL_OP_LOGISTIC_H__ @@ -30,31 +36,61 @@ namespace op namespace Logistic { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t ifm_index; /**< Index of input feature map */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Lstm.h b/runtimes/pure_arm_compute/src/internal/op/Lstm.h index 056ac2ea7..f51f0402a 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Lstm.h +++ b/runtimes/pure_arm_compute/src/internal/op/Lstm.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Lstm.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::LSTM::Param struct + * and internal::tflite::op::LSTM::Node class + */ #ifndef __INTERNAL_OP_LSTM_H__ #define __INTERNAL_OP_LSTM_H__ @@ -30,56 +36,87 @@ namespace op namespace LSTM { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t scratch_buffer_index; - int32_t output_state_out_index; - int32_t cell_state_out_index; - int32_t output_index; + int32_t scratch_buffer_index; /**< Index of scartch buffer */ + int32_t output_state_out_index; /**< Index of output state out */ + int32_t cell_state_out_index; /**< Index of cell state out */ + int32_t output_index; /**< Index of output */ - int32_t input_index; - int32_t input_to_input_weights_index; - int32_t input_to_forget_weights_index; - int32_t input_to_cell_weights_index; - int32_t input_to_output_weights_index; - int32_t recurrent_to_input_weights_index; - int32_t recurrent_to_forget_weights_index; - int32_t recurrent_to_cell_weights_index; - int32_t recurrent_to_output_weights_index; - int32_t cell_to_input_weights_index; - int32_t cell_to_forget_weights_index; - int32_t cell_to_output_weights_index; - int32_t input_gate_bias_index; - int32_t forget_gate_bias_index; - int32_t cell_bias_index; - int32_t output_gate_bias_index; - int32_t projection_weights_index; - int32_t projection_bias_index; - int32_t output_state_in_index; - int32_t cell_state_in_index; - int32_t activation_index; - int32_t cell_threshold_index; - int32_t projection_threshold_index; + int32_t input_index; /**< Index of input */ + int32_t input_to_input_weights_index; /**< Index of input to input weights */ + int32_t input_to_forget_weights_index; /**< Index of input to forget weights */ + int32_t input_to_cell_weights_index; /**< Index of input to cell weights */ + int32_t input_to_output_weights_index; /**< Index of input to output weights */ + int32_t recurrent_to_input_weights_index; /**< Index of recurrent to input weights */ + int32_t recurrent_to_forget_weights_index; /**< Index of recurrent to forget weights */ + int32_t recurrent_to_cell_weights_index; /**< Index of recurrent to cell weights */ + int32_t recurrent_to_output_weights_index; /**< Index of recurrent to output weights */ + int32_t cell_to_input_weights_index; /**< Index of cell to input weights */ + int32_t cell_to_forget_weights_index; /**< Index of cell to forget weights */ + int32_t cell_to_output_weights_index; /**< Index of cell to output weights */ + int32_t input_gate_bias_index; /**< Index of input gate bias */ + int32_t forget_gate_bias_index; /**< Index of forget gate bias */ + int32_t cell_bias_index; /**< Index of cell bias */ + int32_t output_gate_bias_index; /**< Index of output gate bias */ + int32_t projection_weights_index; /**< Index of projection weights */ + int32_t projection_bias_index; /**< Index of projection bias */ + int32_t output_state_in_index; /**< Index of output state in */ + int32_t cell_state_in_index; /**< Index of cell state in */ + int32_t activation_index; /**< Index of activation */ + int32_t cell_threshold_index; /**< Index of cell threshold */ + int32_t projection_threshold_index; /**< Index of projection threshold */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h index d5da17d66..329ccecb7 100644 --- a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file MaxPool2D.h + * @brief This file contains accept function and params for MaxPool2D operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_MAX_POOL_2D_H__ #define __INTERNAL_OP_MAX_POOL_2D_H__ @@ -32,44 +38,77 @@ namespace MaxPool2D namespace Explicit { +/** + * @brief Struct of MaxPool2D(Explicit) operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; + int32_t ifm_index; /**< Input format index */ - int32_t kw_index; - int32_t kh_index; + int32_t kw_index; /**< Kernel width index */ + int32_t kh_index; /**< Kernel height index */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Horizontal stride index */ + int32_t vstride_index; /**< Vertical stride index */ - int32_t padding_left_index; - int32_t padding_right_index; - int32_t padding_top_index; - int32_t padding_bottom_index; + int32_t padding_left_index; /**< Left padding index */ + int32_t padding_right_index; /**< Right padding index */ + int32_t padding_top_index; /**< Top padding index */ + int32_t padding_bottom_index; /**< Bottom padding index */ - int32_t activation_index; + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for MaxPool2D(Explicit) as default + */ Param() = default; + + /** + * @brief Construct a new Param object for MaxPool2D(Explicit) with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for MaxPool2D(Explicit) + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for MaxPool2D(Explicit) with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for MaxPool2D(Explicit) + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for MaxPool2D(Explicit) + * @return Parameters of MaxPool2D(Explicit) + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for MaxPool2D(Explicit) + * @param [in] v Node visitor for invoking visit function of MaxPool2D(Explicit) + * @return N/A + */ void accept(NodeVisitor &&) const override; private: @@ -81,40 +120,73 @@ private: namespace Implicit { +/** + * @brief Struct of MaxPool2D(Implicit) operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; + int32_t ifm_index; /**< Input format index */ - int32_t kw_index; - int32_t kh_index; + int32_t kw_index; /**< Kernel width index */ + int32_t kh_index; /**< Kernel height index */ - int32_t hstride_index; - int32_t vstride_index; + int32_t hstride_index; /**< Horizontal stride index */ + int32_t vstride_index; /**< Vertical stride index */ - int32_t padding_index; - int32_t activation_index; + int32_t padding_index; /**< Padding index */ + int32_t activation_index; /**< Activation index */ + /** + * @brief Construct a new Param object for MaxPool2D(Implicit) as default + */ Param() = default; + + /** + * @brief Construct a new Param object for MaxPool2D(Implicit) with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for MaxPool2D(Implicit) + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for MaxPool2D(Implicit) with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for MaxPool2D(Implicit) + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for MaxPool2D(Implicit) + * @return Parameters of MaxPool2D(Implicit) + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for MaxPool2D(Implicit) + * @param [in] v Node visitor for invoking visit function of MaxPool2D(Implicit) + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Mean.h b/runtimes/pure_arm_compute/src/internal/op/Mean.h index 385b38dbf..f8e7ed308 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Mean.h +++ b/runtimes/pure_arm_compute/src/internal/op/Mean.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Mean.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Mean::Param struct + * and internal::tflite::op::Mean::Node class + */ #ifndef __INTERNAL_OP_MEAN_H__ #define __INTERNAL_OP_MEAN_H__ @@ -30,33 +36,64 @@ namespace op namespace Mean { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; // output - - int32_t ifm_index; // input - int32_t axis_index; // axis - int32_t keep_dims_index; // keep_dims + int32_t ofm_index; /**< Index of output feature map */ // output + int32_t ifm_index; /**< Index of input feature map */ // input + int32_t axis_index; /**< Index of axis */ // axis + int32_t keep_dims_index; /**< Index of keep dims */ // keep_dims + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Mul.h b/runtimes/pure_arm_compute/src/internal/op/Mul.h index ebb72c4be..9710dd057 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Mul.h +++ b/runtimes/pure_arm_compute/src/internal/op/Mul.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file Mul.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Mul class + */ #ifndef __INTERNAL_OP_MUL_H__ #define __INTERNAL_OP_MUL_H__ @@ -30,33 +35,63 @@ namespace op namespace Mul { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t lhs_index; - int32_t rhs_index; - int32_t activation_index; + int32_t ofm_index; /** Index of output feature map */ + int32_t lhs_index; /** Index of lhs */ + int32_t rhs_index; /** Index of rhs */ + int32_t activation_index; /** Index of activation */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.cc b/runtimes/pure_arm_compute/src/internal/op/Neg.cc new file mode 100644 index 000000000..72fecf484 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Neg.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Neg.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Neg +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Neg +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Neg +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 1 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + ifm_index = inputs[0]; +} + +} // namespace Neg +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.h b/runtimes/pure_arm_compute/src/internal/op/Neg.h new file mode 100644 index 000000000..77507df3d --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Neg.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_NEG_H__ +#define __INTERNAL_OP_NEG_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Neg +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Neg +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_NEG_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Node.h b/runtimes/pure_arm_compute/src/internal/op/Node.h index 3927c20f0..be1cbdb5b 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Node.h +++ b/runtimes/pure_arm_compute/src/internal/op/Node.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Node.h + * @brief This file contains struct of Node and NodeVisitor + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_NODE_H__ #define __INTERNAL_OP_NODE_H__ @@ -24,12 +30,26 @@ namespace tflite namespace op { +/** + * @brief Struct of operation NodeVisitor + */ struct NodeVisitor; +/** + * @brief Struct of operation Node + */ struct Node { + /** + * @brief Destroy the Node object for operation + */ virtual ~Node() = default; + /** + * @brief Function for accepting node for operation + * @param [in] v Node visitor for invoking visit function of operation + * @return N/A + */ virtual void accept(NodeVisitor &&) const = 0; }; diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h index 6d8d10af0..0c1a4001d 100644 --- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h +++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file NodeVisitor.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines NodeVisitor + */ + #ifndef __INTERNAL_OP_NODE_VISITOR_H__ #define __INTERNAL_OP_NODE_VISITOR_H__ @@ -26,6 +32,7 @@ #include "internal/op/Dequantize.h" #include "internal/op/MaxPool2D.h" #include "internal/op/AvgPool2D.h" +#include "internal/op/ArgMax.h" #include "internal/op/Concat.h" #include "internal/op/Reshape.h" #include "internal/op/ResizeBilinear.h" @@ -33,9 +40,11 @@ #include "internal/op/FullyConnected.h" #include "internal/op/Softmax.h" #include "internal/op/ReduceMax.h" +#include "internal/op/ReduceMin.h" #include "internal/op/Cast.h" #include "internal/op/TopKV2.h" #include "internal/op/Gather.h" +#include "internal/op/PReLU.h" #include "internal/op/ReLU.h" #include "internal/op/ReLU1.h" #include "internal/op/ReLU6.h" @@ -49,13 +58,30 @@ #include "internal/op/Floor.h" #include "internal/op/Split.h" #include "internal/op/RSQRT.h" +#include "internal/op/SQRT.h" #include "internal/op/Pad.h" #include "internal/op/SpaceToDepth.h" +#include "internal/op/SpaceToBatchND.h" #include "internal/op/L2Pool2D.h" #include "internal/op/EmbeddingLookup.h" #include "internal/op/HashtableLookup.h" #include "internal/op/L2Normalization.h" #include "internal/op/SquaredDifference.h" +#include "internal/op/LocalResponseNormalization.h" +#include "internal/op/DepthToSpace.h" +#include "internal/op/Unpack.h" +#include "internal/op/Neg.h" +#include "internal/op/Exp.h" +#include "internal/op/ReduceSum.h" +#include "internal/op/Equal.h" +#include "internal/op/BatchToSpaceNd.h" +#include "internal/op/TransposeConv.h" +#include "internal/op/Pack.h" +#include "internal/op/Abs.h" +#include "internal/op/NotEqual.h" +#include "internal/op/LogicalAnd.h" +#include "internal/op/LogicalNot.h" +#include "internal/op/LogicalOr.h" namespace internal { @@ -64,54 +90,400 @@ namespace tflite namespace op { +/** + * @brief Struct to define visitor for operation Nodes + */ struct NodeVisitor { + /** + * @brief Destruct NodeVisitor object with default + */ virtual ~NodeVisitor() = default; + /** + * @brief Visit an Add node + * @param[in] node Add node to visit + * @return N/A + */ virtual void visit(const Add::Node &) = 0; + /** + * @brief Visit a Mul node + * @param[in] node Mul node to visit + * @return N/A + */ virtual void visit(const Sub::Node &) = 0; + /** + * @brief Visit a Mul node + * @param[in] node Mul node to visit + * @return N/A + */ virtual void visit(const Mul::Node &) = 0; + /** + * @brief Visit a Div node + * @param[in] node Div node to visit + * @return N/A + */ virtual void visit(const Div::Node &) = 0; + /** + * @brief Visit a Conv2D node with implicit padding + * @param[in] node Conv2D node to visit + * @return N/A + */ virtual void visit(const Conv2D::Implicit::Node &) = 0; + /** + * @brief Visit a Conv2D node with explicit padding + * @param[in] node Conv2D node to visit + * @return N/A + */ virtual void visit(const Conv2D::Explicit::Node &) = 0; + /** + * @brief Visit a DepthwiseConv2D node with implicit padding + * @param[in] node DepthwiseConv2D node to visit + * @return N/A + */ virtual void visit(const DepthwiseConv2D::Implicit::Node &) = 0; + /** + * @brief Visit a DepthwiseConv2D node with explicit padding + * @param[in] node DepthwiseConv2D node to visit + * @return N/A + */ virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0; + /** + * @brief Visit a Dequantize node + * @param[in] node Dequantize node to visit + * @return N/A + */ virtual void visit(const Dequantize::Node &) = 0; + /** + * @brief Visit a MaxPool2D node with implicit padding + * @param[in] node MaxPool2D node to visit + * @return N/A + */ virtual void visit(const MaxPool2D::Implicit::Node &) = 0; + /** + * @brief Visit a MaxPool2D node with explicit padding + * @param[in] node MaxPool2D node to visit + * @return N/A + */ virtual void visit(const MaxPool2D::Explicit::Node &) = 0; + /** + * @brief Visit an AvgPool2D node with implicit padding + * @param[in] node AvgPool2D node to visit + * @return N/A + */ virtual void visit(const AvgPool2D::Implicit::Node &) = 0; + /** + * @brief Visit an AvgPool2D node with explicit padding + * @param[in] node AvgPool2D node to visit + * @return N/A + */ virtual void visit(const AvgPool2D::Explicit::Node &) = 0; + /** + * @brief Visit a Concat node + * @param[in] node Concat node to visit + * @return N/A + */ virtual void visit(const Concat::Node &) = 0; + /** + * @brief Visit a ArgMax node + * @param[in] node ArgMax node to visit + * @return N/A + */ + virtual void visit(const ArgMax::Node &) = 0; + /** + * @brief Visit an Reshape node + * @param[in] node Reshape node to visit + * @return N/A + */ virtual void visit(const Reshape::Node &) = 0; + /** + * @brief Visit an ResizeBilinear node + * @param[in] node ResizeBilinear node to visit + * @return N/A + */ virtual void visit(const ResizeBilinear::Node &) = 0; + /** + * @brief Visit a StridedSlice node + * @param[in] node StridedSlice node to visit + * @return N/A + */ virtual void visit(const StridedSlice::Node &) = 0; + /** + * @brief Visit a FullyConnected node + * @param[in] node FullyConnected node to visit + * @return N/A + */ virtual void visit(const FullyConnected::Node &) = 0; + /** + * @brief Visit a Softmax node + * @param[in] node Softmax node to visit + * @return N/A + */ virtual void visit(const Softmax::Node &) = 0; + /** + * @brief Visit a ReduceMax node + * @param[in] node ReduceMax node to visit + * @return N/A + */ virtual void visit(const ReduceMax::Node &) = 0; + /** + * @brief Visit a ReduceMin node + * @param[in] node ReduceMin node to visit + * @return N/A + */ + virtual void visit(const ReduceMin::Node &) = 0; + /** + * @brief Visit a Cast node + * @param[in] node Cast node to visit + * @return N/A + */ virtual void visit(const Cast::Node &) = 0; + /** + * @brief Visit a TopKV2 node + * @param[in] node TopKV2 node to visit + * @return N/A + */ virtual void visit(const TopKV2::Node &) = 0; + /** + * @brief Visit a Gather node + * @param[in] node Gather node to visit + * @return N/A + */ virtual void visit(const Gather::Node &) = 0; + /** + * @brief Visit an PReLU node + * @param[in] node PReLU node to visit + * @return N/A + */ + virtual void visit(const PReLU::Node &) = 0; + /** + * @brief Visit an ReLU node + * @param[in] node Relu node to visit + * @return N/A + */ virtual void visit(const ReLU::Node &) = 0; + /** + * @brief Visit a ReLU1 node + * @param[in] node ReLU1 node to visit + * @return N/A + */ virtual void visit(const ReLU1::Node &) = 0; + /** + * @brief Visit a ReLU6 node + * @param[in] node ReLU6 node to visit + * @return N/A + */ virtual void visit(const ReLU6::Node &) = 0; + /** + * @brief Visit a Tanh node + * @param[in] node Tanh node to visit + * @return N/A + */ virtual void visit(const Tanh::Node &) = 0; + /** + * @brief Visit a Squeeze node + * @param[in] node Squeeze node to visit + * @return N/A + */ virtual void visit(const Squeeze::Node &) = 0; + /** + * @brief Visit an Logistic node + * @param[in] node Logistic node to visit + * @return N/A + */ virtual void visit(const Logistic::Node &) = 0; + /** + * @brief Visit a Mean node + * @param[in] node Mean node to visit + * @return N/A + */ virtual void visit(const Mean::Node &) = 0; + /** + * @brief Visit an RNN node + * @param[in] node RNN node to visit + * @return N/A + */ virtual void visit(const RNN::Node &) = 0; + /** + * @brief Visit a Transpose node + * @param[in] node Transpose node to visit + * @return N/A + */ virtual void visit(const Transpose::Node &) = 0; + /** + * @brief Visit an LSTM node + * @param[in] node LSTM node to visit + * @return N/A + */ virtual void visit(const LSTM::Node &) = 0; + /** + * @brief Visit a Floor node + * @param[in] node Floor node to visit + * @return N/A + */ virtual void visit(const Floor::Node &) = 0; + /** + * @brief Visit a Split node + * @param[in] node Split node to visit + * @return N/A + */ virtual void visit(const Split::Node &) = 0; + /** + * @brief Visit an RSQRT node + * @param[in] node RSQRT node to visit + * @return N/A + */ virtual void visit(const RSQRT::Node &) = 0; + /** + * @brief Visit an SQRT node + * @param[in] node SQRT node to visit + * @return N/A + */ + virtual void visit(const SQRT::Node &) = 0; + /** + * @brief Visit a Pad node + * @param[in] node Pad node to visit + * @return N/A + */ virtual void visit(const Pad::Node &) = 0; + /** + * @brief Visit a SpaceToDepth node + * @param[in] node SpaceToDepth node to visit + * @return N/A + */ virtual void visit(const SpaceToDepth::Node &) = 0; + /** + * @brief Visit a SpaceToBatchND node + * @param[in] node SpaceToBatchND node to visit + * @return N/A + */ + virtual void visit(const SpaceToBatchND::Node &) = 0; + /** + * @brief Visit an L2Pool2D node with implicit padding + * @param[in] node L2Pool2D node to visit + * @return N/A + */ virtual void visit(const L2Pool2D::Implicit::Node &) = 0; + /** + * @brief Visit an L2Pool2D node with explicit padding + * @param[in] node L2Pool2D node to visit + * @return N/A + */ virtual void visit(const L2Pool2D::Explicit::Node &) = 0; + /** + * @brief Visit an EmbeddingLookup node + * @param[in] node EmbeddingLookup node to visit + * @return N/A + */ virtual void visit(const EmbeddingLookup::Node &) = 0; + /** + * @brief Visit a HashtableLookup node + * @param[in] node HashtableLookup node to visit + * @return N/A + */ virtual void visit(const HashtableLookup::Node &) = 0; + /** + * @brief Visit an L2Normalization node + * @param[in] node L2Normalization node to visit + * @return N/A + */ virtual void visit(const L2Normalization::Node &) = 0; + /** + * @brief Visit a SquaredDifference node + * @param[in] node SquaredDifference node to visit + * @return N/A + */ virtual void visit(const SquaredDifference::Node &) = 0; + /** + * @brief Visit a LocalResponseNormalization node + * @param[in] node LocalResponseNormalization node to visit + * @return N/A + */ + virtual void visit(const LocalResponseNormalization::Node &) = 0; + /** + * @brief Visit a DepthToSpace node + * @param[in] node DepthToSpace node to visit + * @return N/A + */ + virtual void visit(const DepthToSpace::Node &) = 0; + /** + * @brief Visit a Unpack node + * @param[in] node Unpack node to visit + * @return N/A + */ + virtual void visit(const Unpack::Node &) = 0; + /** + * @brief Visit a Neg node + * @param[in] node Neg node to visit + * @return N/A + */ + virtual void visit(const Neg::Node &) = 0; + /** + * @brief Visit a Exp node + * @param[in] node Exp node to visit + * @return N/A + */ + virtual void visit(const Exp::Node &) = 0; + /** + * @brief Visit a ReduceSum node + * @param[in] node ReduceSum node to visit + * @return N/A + */ + virtual void visit(const ReduceSum::Node &) = 0; + /** + * @brief Visit a Equal node + * @param[in] node Equal node to visit + * @return N/A + */ + virtual void visit(const Equal::Node &) = 0; + /** + * @brief Visit a BatchToSpaceNd node + * @param[in] node BatchToSpaceNd node to visit + * @return N/A + */ + virtual void visit(const BatchToSpaceNd::Node &) = 0; + /** + * @brief Visit a TransposeConv node + * @param[in] node TransposeConv node to visit + * @return N/A + */ + virtual void visit(const TransposeConv::Node &) = 0; + /** + * @brief Visit a Pack node + * @param[in] node Pack node to visit + * @return N/A + */ + virtual void visit(const Pack::Node &) = 0; + /** + * @brief Visit a Abs node + * @param[in] node Abs node to visit + * @return N/A + */ + virtual void visit(const Abs::Node &) = 0; + /** + * @brief Visit a NotEqual node + * @param[in] node NotEqual node to visit + * @return N/A + */ + virtual void visit(const NotEqual::Node &) = 0; + /** + * @brief Visit a LogicalAnd node + * @param[in] node LogicalAnd node to visit + * @return N/A + */ + virtual void visit(const LogicalAnd::Node &) = 0; + /** + * @brief Visit a LogicalNot node + * @param[in] node LogicalNot node to visit + * @return N/A + */ + virtual void visit(const LogicalNot::Node &) = 0; + /** + * @brief Visit a LogicalOr node + * @param[in] node LogicalOr node to visit + * @return N/A + */ + virtual void visit(const LogicalOr::Node &) = 0; }; } // namespace op diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc new file mode 100644 index 000000000..2906e214b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/NotEqual.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace NotEqual +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace NotEqual +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace NotEqual +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input1 Tensor Index + // 1 -> input2 Tensor Index + input1_index = inputs[0]; + input2_index = inputs[1]; +} + +} // namespace NotEqual +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.h b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h new file mode 100644 index 000000000..0d6130948 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_NOT_EQUAL_H__ +#define __INTERNAL_OP_NOT_EQUAL_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace NotEqual +{ + +struct Param +{ + int32_t output_index; + + int32_t input1_index; + int32_t input2_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +} // namespace NotEqual +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace NotEqual +{ +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace NotEqual +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_NOT_EQUAL_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.cc b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc new file mode 100644 index 000000000..25b06505b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/PReLU.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace PReLU +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace PReLU +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace PReLU +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + // 1 -> alpha Tensor Index + ifm_index = inputs[0]; + alpha_index = inputs[1]; +} + +} // namespace PReLU +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.h b/runtimes/pure_arm_compute/src/internal/op/PReLU.h new file mode 100644 index 000000000..ae754abb4 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ReLU.h + * @brief This file contains accept function and params for ReLU operation + * @ingroup COM_AI_RUNTIME + */ + +#ifndef __INTERNAL_OP_PRELU_H__ +#define __INTERNAL_OP_PRELU_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace PReLU +{ + +/** + * @brief Struct of PReLU operation's param + */ +struct Param +{ + int32_t ofm_index; /**< Output format index */ + + int32_t ifm_index; /**< Input format index */ + int32_t alpha_index; /**< Alpha input index */ + + /** + * @brief Construct a new Param object for ReLU as default + */ + Param() = default; + + /** + * @brief Construct a new Param object for PReLU with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to define operation node for PReLU + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object for PReLU with param + * @param [in] param Parameters for Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destroy the Node object for PReLU + */ + virtual ~Node() = default; + +public: + /** + * @brief Get parameters for PReLU + * @return Parameters of PReLU + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Function for accepting node for PReLU + * @param [in] v Node visitor for invoking visit function of PReLU + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace PReLU +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_PRELU_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.cc b/runtimes/pure_arm_compute/src/internal/op/Pack.cc new file mode 100644 index 000000000..73f89b840 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Pack.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Pack.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pack +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Pack +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pack +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(outputCount == 1); + + // Each input should be interpreted as follows: + // + // 0 .. n - 3 -> Input Tensor Index + // n - 2 -> Input Tensor counts (will be ignored) + // n - 1 -> Input Axis Index + ofm_index = outputs[0]; + axis_index = inputs[inputCount - 1]; + // last input is axis along which packing is required + for (uint32_t n = 0; n < inputCount - 2; ++n) + { + ifm_indexes.emplace_back(inputs[n]); + } +} + +} // namespace Pack +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.h b/runtimes/pure_arm_compute/src/internal/op/Pack.h new file mode 100644 index 000000000..c5de01bd8 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Pack.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_PACK_H__ +#define __INTERNAL_OP_PACK_H__ + +#include "internal/op/Node.h" +#include <vector> + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pack +{ + +struct Param +{ + int32_t ofm_index; + // There are N+1 inputs, 0 to N-1 are tensors of same shape + // Nth input is axis index along which stack is needed to be done. + std::vector<int32_t> ifm_indexes; + int32_t axis_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Pack +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_PACK_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.cc b/runtimes/pure_arm_compute/src/internal/op/Pad.cc index 24d08bf36..00938242b 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Pad.cc +++ b/runtimes/pure_arm_compute/src/internal/op/Pad.cc @@ -1,63 +1,63 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/Pad.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 2 && outputCount == 1);
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- // 1 -> paddings
- ifm_index = inputs[0];
- paddings_index = inputs[1];
-}
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Pad.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pad +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Pad +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pad +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> input Tensor Index + // 1 -> paddings + ifm_index = inputs[0]; + paddings_index = inputs[1]; +} +} // namespace Pad +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.h b/runtimes/pure_arm_compute/src/internal/op/Pad.h index e3ddae44c..68752a10e 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Pad.h +++ b/runtimes/pure_arm_compute/src/internal/op/Pad.h @@ -1,69 +1,107 @@ -/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_PAD_H__
-#define __INTERNAL_OP_PAD_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-struct Param
-{
- int32_t ifm_index;
- int32_t paddings_index;
- int32_t ofm_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param ¶m) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param ¶m(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_PAD_H_
+/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Pad.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Pad node + */ + +#ifndef __INTERNAL_OP_PAD_H__ +#define __INTERNAL_OP_PAD_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Pad +{ + +/** + * @brief Struct to manipulate parameter for Pad operation + */ +struct Param +{ + int32_t ifm_index; //!< index for input + int32_t paddings_index; //!< index for padding + int32_t ofm_index; //!< index for output + + /** + * @brief Default Constructor + */ + Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to define Pad Operation + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new RNN Node object + * @param param Parameter for RNN Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Default Destructor + */ + virtual ~Node() = default; + +public: + /** + * @brief Get parameter + * @return Param reference + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; //!< parameter for Pad node +}; + +} // namespace Pad +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_PAD_H_ diff --git a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h index e384b27f2..e39d60241 100644 --- a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h +++ b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file RSQRT.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::RSQRT::Param struct + * and internal::tflite::op::RSQRT::Node class + */ #ifndef __INTERNAL_OP_RSQRT_H__ #define __INTERNAL_OP_RSQRT_H__ @@ -30,31 +36,61 @@ namespace op namespace RSQRT { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t output_index; - - int32_t input_index; + int32_t output_index; /**< Index of output feature map */ + int32_t input_index; /**< Index of input feature map */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU.h b/runtimes/pure_arm_compute/src/internal/op/ReLU.h index 64dcf2e14..aaa39b523 100644 --- a/runtimes/pure_arm_compute/src/internal/op/ReLU.h +++ b/runtimes/pure_arm_compute/src/internal/op/ReLU.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file ReLU.h + * @brief This file contains accept function and params for ReLU operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_RELU_H__ #define __INTERNAL_OP_RELU_H__ @@ -30,31 +36,64 @@ namespace op namespace ReLU { +/** + * @brief Struct of ReLU operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; + int32_t ifm_index; /**< Input format index */ + /** + * @brief Construct a new Param object for ReLU as default + */ Param() = default; + + /** + * @brief Construct a new Param object for ReLU with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for ReLU + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for ReLU with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for ReLU + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for ReLU + * @return Parameters of ReLU + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for ReLU + * @param [in] v Node visitor for invoking visit function of ReLU + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h index 997a9faff..330445af8 100644 --- a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h +++ b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file ReLU1.h + * @brief This file contains accept function and params for ReLU1 operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_RELU1_H__ #define __INTERNAL_OP_RELU1_H__ @@ -30,31 +36,64 @@ namespace op namespace ReLU1 { +/** + * @brief Struct of ReLU1 operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; + int32_t ifm_index; /**< Input format index */ + /** + * @brief Construct a new Param object for ReLU1 as default + */ Param() = default; + + /** + * @brief Construct a new Param object for ReLU1 with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for ReLU1 + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for ReLU1 with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for ReLU1 + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for ReLU1 + * @return Parameters of ReLU1 + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for ReLU1 + * @param [in] v Node visitor for invoking visit function of ReLU1 + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h index 77c55b64c..6fc2c24fe 100644 --- a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h +++ b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h @@ -14,6 +14,11 @@ * limitations under the License. */ +/** + * @file ReLU6.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::ReLU6 class + */ #ifndef __INTERNAL_OP_RELU6_H__ #define __INTERNAL_OP_RELU6_H__ @@ -30,31 +35,61 @@ namespace op namespace ReLU6 { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; + int32_t ofm_index; /** Index of output feature map */ + int32_t ifm_index; /** Index of input feature map */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h index e3278aacf..77d8bd869 100644 --- a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h +++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file ReduceMax.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::ReduceMax::Param struct + * and internal::tflite::op::ReduceMax::Node class + */ #ifndef __INTERNAL_OP_REDUCEMAX_H__ #define __INTERNAL_OP_REDUCEMAX_H__ @@ -30,32 +36,63 @@ namespace op namespace ReduceMax { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; - int32_t axis_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t ifm_index; /**< Index of input feature map */ + int32_t axis_index; /**< Index of axis */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc new file mode 100644 index 000000000..72b6079d4 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/ReduceMin.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceMin +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace ReduceMin +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceMin +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + ifm_index = inputs[0]; + axis_index = inputs[1]; +} + +} // namespace ReduceMin +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h new file mode 100644 index 000000000..5dd82ec43 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ReduceMin.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::ReduceMin::Param struct + * and internal::tflite::op::ReduceMin::Node class + */ +#ifndef __INTERNAL_OP_REDUCEMIN_H__ +#define __INTERNAL_OP_REDUCEMIN_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceMin +{ + +/** + * @brief Struct to have indexes for operation parameter + */ +struct Param +{ + int32_t ofm_index; /**< Index of output feature map */ + + int32_t ifm_index; /**< Index of input feature map */ + int32_t axis_index; /**< Index of axis */ + /** + * @brief Construct as default + */ + Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to represent an operation of data structure + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destruct as default + */ + virtual ~Node() = default; + +public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace ReduceMin +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_REDUCEMIN_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc new file mode 100644 index 000000000..4d83c1734 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/ReduceSum.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceSum +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace ReduceSum +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceSum +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 2 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + ifm_index = inputs[0]; + axis_index = inputs[1]; +} + +} // namespace ReduceSum +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h new file mode 100644 index 000000000..9c661f63a --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_REDUCESUM_H__ +#define __INTERNAL_OP_REDUCESUM_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace ReduceSum +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + int32_t axis_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace ReduceSum +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_REDUCESUM_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Reshape.h b/runtimes/pure_arm_compute/src/internal/op/Reshape.h index ab77ade8c..7152eaece 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Reshape.h +++ b/runtimes/pure_arm_compute/src/internal/op/Reshape.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Reshape.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Reshape node + */ + #ifndef __INTERNAL_OP_RESHAPE_H__ #define __INTERNAL_OP_RESHAPE_H__ @@ -30,36 +36,68 @@ namespace op namespace Reshape { +/** + * @brief Struct to manipulate parameter for Reshape operation + */ struct Param { - int32_t output_index; + int32_t output_index; //!< index for output feature map - int32_t input_index; - int32_t shape_index; + int32_t input_index; //!< index for input feature map + int32_t shape_index; //!< index for shape + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define Reshape Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Reshape Node object + * @param param Parameter for Reshape Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for Reshape node }; } // namespace Reshape diff --git a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h index bf216b75f..f2eab4aaf 100644 --- a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h +++ b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file ResizeBilinear.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::ResizeBilinear::Param struct + * and internal::tflite::op::ResizeBilinear::Node class + */ #ifndef __INTERNAL_OP_RESIZE_BILINEAR_H__ #define __INTERNAL_OP_RESIZE_BILINEAR_H__ @@ -30,33 +36,64 @@ namespace op namespace ResizeBilinear { +/** + * @brief Struct to have indexes for ResizeBilinear operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t ifm_index; - int32_t height_index; - int32_t width_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t ifm_index; /**< Index of input feature map */ + int32_t height_index; /**< Index of height */ + int32_t width_index; /**< Index of width */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an ResizeBilinear operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Rnn.h b/runtimes/pure_arm_compute/src/internal/op/Rnn.h index c436a0987..7b2a10843 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Rnn.h +++ b/runtimes/pure_arm_compute/src/internal/op/Rnn.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Rnn.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines RNN node + */ + #ifndef __INTERNAL_OP_RNN_H__ #define __INTERNAL_OP_RNN_H__ @@ -30,41 +36,73 @@ namespace op namespace RNN { +/** + * @brief Struct to manipulate parameter for RNN operation + */ struct Param { - int32_t output_index; - int32_t hidden_state_out_index; + int32_t output_index; //!< index for ouuput + int32_t hidden_state_out_index; //!< index for hidden state output - int32_t input_index; - int32_t weights_index; - int32_t recurrent_weights_index; - int32_t bias_index; - int32_t hidden_state_in_index; - int32_t fused_activation_index; + int32_t input_index; //!< index for input + int32_t weights_index; //!< index for weight + int32_t recurrent_weights_index; //!< index for recurrent weights + int32_t bias_index; //!< index for bias + int32_t hidden_state_in_index; //!< index for hidden state input + int32_t fused_activation_index; //!< index for fused activation + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define RNN Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new RNN Node object + * @param param Parameter for RNN Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for RNN node }; } // namespace RNN diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.cc b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc new file mode 100644 index 000000000..70ce42e9c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/SQRT.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SQRT +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace SQRT +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SQRT +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 1 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // 0 -> input Tensor Index + input_index = inputs[0]; +} + +} // namespace SQRT +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.h b/runtimes/pure_arm_compute/src/internal/op/SQRT.h new file mode 100644 index 000000000..85dfb97a7 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file SQRT.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::SQRT::Param struct + * and internal::tflite::op::SQRT::Node class + */ +#ifndef __INTERNAL_OP_SQRT_H__ +#define __INTERNAL_OP_SQRT_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SQRT +{ + +/** + * @brief Struct to have indexes for operation parameter + */ +struct Param +{ + int32_t output_index; /**< Index of output feature map */ + + int32_t input_index; /**< Index of input feature map */ + /** + * @brief Construct as default + */ + Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +/** + * @brief Class to represent an operation of data structure + */ +class Node final : public op::Node +{ +public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + /** + * @brief Destruct as default + */ + virtual ~Node() = default; + +public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ + const Param ¶m(void) const { return _param; } + +public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace SQRT +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_SQRT_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Softmax.h b/runtimes/pure_arm_compute/src/internal/op/Softmax.h index 746f6b4e6..6e631af5f 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Softmax.h +++ b/runtimes/pure_arm_compute/src/internal/op/Softmax.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Softmax.h + * @brief This file contains accept function and params for Softmax operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_SOFTMAX_H__ #define __INTERNAL_OP_SOFTMAX_H__ @@ -30,32 +36,65 @@ namespace op namespace Softmax { +/** + * @brief Struct of Softmax operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t input_index; - int32_t scale_index; + int32_t input_index; /**< Input index */ + int32_t scale_index; /**< Scale index */ + /** + * @brief Construct a new Param object for Softmax as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Softmax with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Softmax + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for Softmax with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for Softmax + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for Softmax + * @return Parameters of Softmax + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for Softmax + * @param [in] v Node visitor for invoking visit function of Softmax + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc new file mode 100644 index 000000000..9ab026cf4 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/SpaceToBatchND.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SpaceToBatchND +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace SpaceToBatchND +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SpaceToBatchND +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 3 && outputCount == 1); + + output_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Block size Index + // 2 -> Padding size Index + input_index = inputs[0]; + block_size_index = inputs[1]; + padding_size_index = inputs[2]; +} + +} // namespace SpaceToBatchND +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h new file mode 100644 index 000000000..650d068f4 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_SPACETOBATCHND_H__ +#define __INTERNAL_OP_SPACETOBATCHND_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace SpaceToBatchND +{ + +struct Param +{ + int32_t output_index; + + int32_t input_index; + int32_t block_size_index; + int32_t padding_size_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace SpaceToBatchND +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_SPACETOBATCHND_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h index 81bfe5246..2e624006a 100644 --- a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h +++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file SpaceToDepth.h + * @brief This file contains accept function and params for SpaceToDepth operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_SPACETODEPTH_H__ #define __INTERNAL_OP_SPACETODEPTH_H__ @@ -30,32 +36,65 @@ namespace op namespace SpaceToDepth { +/** + * @brief Struct of SpaceToDepth operation's param + */ struct Param { - int32_t output_index; + int32_t output_index; /**< Output index */ - int32_t input_index; - int32_t block_size_index; + int32_t input_index; /**< Input index */ + int32_t block_size_index; /**< Block size index */ + /** + * @brief Construct a new Param object for SpaceToDepth as default + */ Param() = default; + + /** + * @brief Construct a new Param object for SpaceToDepth with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for SpaceToDepth + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for SpaceToDepth with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for SpaceToDepth + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for SpaceToDepth + * @return Parameters of SpaceToDepth + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for SpaceToDepth + * @param [in] v Node visitor for invoking visit function of SpaceToDepth + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Split.h b/runtimes/pure_arm_compute/src/internal/op/Split.h index 8bea1000d..b2c6c2fd1 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Split.h +++ b/runtimes/pure_arm_compute/src/internal/op/Split.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Split.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines Split node + */ + #ifndef __INTERNAL_OP_SPLIT_H__ #define __INTERNAL_OP_SPLIT_H__ @@ -31,36 +37,68 @@ namespace op namespace Split { +/** + * @brief Struct to manipulate parameter for Split operation + */ struct Param { - int32_t axis_index; - int32_t ifm_index; + int32_t axis_index; //!< index for axis + int32_t ifm_index; //!< index for input feature map - std::vector<int32_t> ofm_indexes; + std::vector<int32_t> ofm_indexes; //!< index for output feature map + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define Split Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Split Node object + * @param param Parameter for Split Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for Split node }; } // namespace Split diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc index c2c4f7242..f6c8bc5df 100644 --- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc +++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include "internal/op/SquaredDifference.h" #include "internal/op/NodeVisitor.h" @@ -30,7 +46,7 @@ namespace SquaredDifference Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs) { - assert(inputCount == 3 && outputCount == 1); + assert(inputCount == 2 && outputCount == 1); ofm_index = outputs[0]; @@ -38,10 +54,8 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, // // 0 -> LHS Tensor Index // 1 -> RHS Tensor Index - // 2 -> Activation Index lhs_index = inputs[0]; rhs_index = inputs[1]; - activation_index = inputs[2]; } } // namespace SquaredDifference diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h index 7760405b9..ecbb03209 100644 --- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h +++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file SquaredDifference.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::SquaredDifference::Param struct + * and internal::tflite::op::SquaredDifference::Node class + */ #ifndef __INTERNAL_OP_SQUAREDDIFFERENCE_H__ #define __INTERNAL_OP_SQUAREDDIFFERENCE_H__ @@ -14,33 +36,62 @@ namespace op namespace SquaredDifference { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t ofm_index; - - int32_t lhs_index; - int32_t rhs_index; - int32_t activation_index; + int32_t ofm_index; /**< Index of output feature map */ + int32_t lhs_index; /**< Index of lhs */ + int32_t rhs_index; /**< Index of rhs */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h index e871067f5..d5f36f85f 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h +++ b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Squeeze.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines internal::tflite::op::Squeeze::Param struct + * and internal::tflite::op::Squeeze::Node class + */ #ifndef __INTERNAL_OP_SQUEEZE_H__ #define __INTERNAL_OP_SQUEEZE_H__ @@ -30,32 +36,64 @@ namespace op namespace Squeeze { +/** + * @brief Struct to have indexes for operation parameter + */ struct Param { - int32_t output_index; - - int32_t input_index; - int32_t dims_index_optional = -1; // optional param. default is -1 + int32_t output_index; /**< Index of output feature map */ + int32_t input_index; /**< Index of input feature map */ + // optional param. default is -1 + int32_t dims_index_optional = -1; /**< Index of dims */ + /** + * @brief Construct as default + */ Param() = default; + /** + * @brief Construct a new Param object with params + * @param[in] inputCount Count of inputs + * @param[in] inputs Pointer of inputs + * @param[in] outputCount Count of outputs + * @param[in] outputs Pointer of outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to represent an operation of data structure + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object with param + * @param[in] param Param object that makes up a Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destruct as default + */ virtual ~Node() = default; public: + /** + * @brief Get a reference of Param object + * @return Reference of Param object + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Visit this Node by NodeVisitor + * @param[in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h index 26bb81a8c..21dbb9e68 100644 --- a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h +++ b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file StridedSlice.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines StridedSlice node + */ + #ifndef __INTERNAL_OP_STRIDEDSLICE_H__ #define __INTERNAL_OP_STRIDEDSLICE_H__ @@ -30,41 +36,73 @@ namespace op namespace StridedSlice { +/** + * @brief Struct to manipulate parameter for StridedSlice operation + */ struct Param { - int32_t outputData_index; + int32_t outputData_index; //!< index for output data - int32_t inputData_index; - int32_t startData_index; - int32_t endData_index; - int32_t stridesData_index; - int32_t beginMask_index; - int32_t endMask_index; - int32_t shrinkAxisMask_index; + int32_t inputData_index; //!< index for input data + int32_t startData_index; //!< index where slicing start from + int32_t endData_index; //!< index where slicing ends to + int32_t stridesData_index; //!< index for stride value + int32_t beginMask_index; //!< index for beginmask + int32_t endMask_index; //!< index for endmask + int32_t shrinkAxisMask_index; //!< index for shrink axis + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define StridedSlice Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new StridedSlice Node object + * @param param Parameter for StridedSlice Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for StridedSlice node }; } // namespace StridedSlice diff --git a/runtimes/pure_arm_compute/src/internal/op/Sub.h b/runtimes/pure_arm_compute/src/internal/op/Sub.h index 3da271029..864359d1e 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Sub.h +++ b/runtimes/pure_arm_compute/src/internal/op/Sub.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Sub.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines SUB Node + */ + #ifndef __INTERNAL_OP_SUB_H__ #define __INTERNAL_OP_SUB_H__ @@ -30,37 +36,69 @@ namespace op namespace Sub { +/** + * @brief Struct to manipulate parameters for SUB + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; //!< index for output feature map - int32_t lhs_index; - int32_t rhs_index; - int32_t activation_index; + int32_t lhs_index; //!< index for left-hand side + int32_t rhs_index; //!< index for right-hand side + int32_t activation_index; //!< index for activation function + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define SUB Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Sub Node object + * @param param Parameter for Sub Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for SUB node }; } // namespace Sub diff --git a/runtimes/pure_arm_compute/src/internal/op/Tanh.h b/runtimes/pure_arm_compute/src/internal/op/Tanh.h index f5a9f102e..fd87297f1 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Tanh.h +++ b/runtimes/pure_arm_compute/src/internal/op/Tanh.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Tanh.h + * @ingroup COM_AI_RUNTIME + * @brief This file defines TANH node + */ + #ifndef __INTERNAL_OP_TANH_H__ #define __INTERNAL_OP_TANH_H__ @@ -30,35 +36,67 @@ namespace op namespace Tanh { +/** + * @brief Struct to manipulate parameter for hyperbolic tangent operation + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; //!< index for output feature map - int32_t ifm_index; + int32_t ifm_index; //!< index for input feature map + /** + * @brief Default Constructor + */ Param() = default; + /** + * @brief Construct a new Param object + * @param[in] inputCount the number of inputs + * @param[in] inputs pointer for input data + * @param[in] outputCount the number of outputs + * @param[in] outputs pointer for input data + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define Tanh Operation + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Tanh Node object + * @param param Parameter for Tanh Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Default Destructor + */ virtual ~Node() = default; public: + /** + * @brief Get parameter + * @return Param reference + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Accept a NodeVisitor so that it can visit this node + * @param [in] v Visitor + * @return N/A + */ void accept(NodeVisitor &&) const override; private: - const Param _param; + const Param _param; //!< parameter for Tanh node }; } // namespace Tanh diff --git a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h index 79bbd1f2e..02b7827e9 100644 --- a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h +++ b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file TopKV2.h + * @brief This file contains accept function and params for TopKV2 operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_TOPKV2_H__ #define __INTERNAL_OP_TOPKV2_H__ @@ -30,33 +36,66 @@ namespace op namespace TopKV2 { +/** + * @brief Struct of TopKV2 operation's param + */ struct Param { - int32_t outputValues_index; - int32_t outputIndices_index; + int32_t outputValues_index; /**< Output values index */ + int32_t outputIndices_index; /**< Output indices index */ - int32_t inputData_index; - int32_t k_index; + int32_t inputData_index; /**< Input data index */ + int32_t k_index; /**< K value index */ + /** + * @brief Construct a new Param object for TopKV2 as default + */ Param() = default; + + /** + * @brief Construct a new Param object for TopKV2 with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for TopKV2 + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for TopKV2 with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for TopKV2 + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for TopKV2 + * @return Parameters of TopKV2 + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for TopKV2 + * @param [in] v Node visitor for invoking visit function of TopKV2 + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/Transpose.h b/runtimes/pure_arm_compute/src/internal/op/Transpose.h index dac2ef8f2..bb01bf322 100644 --- a/runtimes/pure_arm_compute/src/internal/op/Transpose.h +++ b/runtimes/pure_arm_compute/src/internal/op/Transpose.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file Transpose.h + * @brief This file contains accept function and params for Transpose operation + * @ingroup COM_AI_RUNTIME + */ + #ifndef __INTERNAL_OP_TRANSPOSE_H__ #define __INTERNAL_OP_TRANSPOSE_H__ @@ -30,32 +36,65 @@ namespace op namespace Transpose { +/** + * @brief Struct of Transpose operation's param + */ struct Param { - int32_t ofm_index; + int32_t ofm_index; /**< Output format index */ - int32_t ifm_index; - int32_t permu_index; + int32_t ifm_index; /**< Input format index */ + int32_t permu_index; /**< Permutation index */ + /** + * @brief Construct a new Param object for Transpose as default + */ Param() = default; + + /** + * @brief Construct a new Param object for Transpose with params + * @param [in] inputCount The number of input + * @param [in] inputs Array containing inputs + * @param [in] outputCount The number of output + * @param [in] outputs Array containing outputs + */ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); }; +/** + * @brief Class to define operation node for Transpose + */ class Node final : public op::Node { public: + /** + * @brief Construct a new Node object for Transpose with param + * @param [in] param Parameters for Node + */ Node(const Param ¶m) : _param(param) { // DO NOTHING } public: + /** + * @brief Destroy the Node object for Transpose + */ virtual ~Node() = default; public: + /** + * @brief Get parameters for Transpose + * @return Parameters of Transpose + */ const Param ¶m(void) const { return _param; } public: + /** + * @brief Function for accepting node for Transpose + * @param [in] v Node visitor for invoking visit function of Transpose + * @return N/A + */ void accept(NodeVisitor &&) const override; private: diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc new file mode 100644 index 000000000..502eff525 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/TransposeConv.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace TransposeConv +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace TransposeConv +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace TransposeConv +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 6 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> Output Shape Index + // 1 -> Weights Index + // 2 -> Input Tensor Index + // 3 -> Padding Type + // 4 -> Stride width + // 5 -> Stride height + + op_shape_index = inputs[0]; + ker_index = inputs[1]; + ifm_index = inputs[2]; + padding_index = inputs[3]; + hstride_index = inputs[4]; + vstride_index = inputs[5]; +} + +} // namespace TransposeConv +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h new file mode 100644 index 000000000..b0122f82d --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_TRANSPOSECONV_H__ +#define __INTERNAL_OP_TRANSPOSECONV_H__ + +#include "internal/op/Node.h" + +#include <cstdint> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace TransposeConv +{ + +struct Param +{ + int32_t ofm_index; + + int32_t op_shape_index; + int32_t ker_index; + int32_t ifm_index; + int32_t padding_index; + int32_t hstride_index; + int32_t vstride_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace TransposeConv +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_TRANSPOSECONV_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.cc b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc new file mode 100644 index 000000000..a1be0280c --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/op/Unpack.h" +#include "internal/op/NodeVisitor.h" + +#include <cassert> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Unpack +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Unpack +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Unpack +{ +// There are three inputs: tensor which is to be unpacked, +// axis along which tensor needs to be unpacked +// and number of splits along the axis. + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 3); + + ifm_index = inputs[0]; + + for (uint32_t n = 0; n < outputCount; ++n) + { + ofm_indexes.emplace_back(outputs[n]); + } + num_split_index = inputs[1]; + axis_index = inputs[2]; +} + +} // namespace Unpack +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.h b/runtimes/pure_arm_compute/src/internal/op/Unpack.h new file mode 100644 index 000000000..575e3d024 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_OP_UNPACK_H__ +#define __INTERNAL_OP_UNPACK_H__ + +#include "internal/op/Node.h" + +#include <cstdint> +#include <vector> + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Unpack +{ + +struct Param +{ + int32_t ifm_index; + int32_t axis_index; + int32_t num_split_index; + // There are N outputs after Unpacking Input Tensor along axis + std::vector<int32_t> ofm_indexes; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Unpack +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_UNPACK_H__ diff --git a/runtimes/pure_arm_compute/src/logging.h b/runtimes/pure_arm_compute/src/logging.h index 61b434eda..914b63057 100644 --- a/runtimes/pure_arm_compute/src/logging.h +++ b/runtimes/pure_arm_compute/src/logging.h @@ -14,6 +14,12 @@ * limitations under the License. */ +/** + * @file logging.h + * @brief This file contains Context class for logging. + * @ingroup COM_AI_RUNTIME + */ + #ifndef __PURE_ARM_COMPUTE_LOGGING_H__ #define __PURE_ARM_COMPUTE_LOGGING_H__ @@ -22,9 +28,15 @@ namespace logging { +/** + * @brief class to define Context for logging + */ class Context { public: + /** + * @brief Construct default + */ Context() : _enabled{false} { auto env = std::getenv("PURE_ARM_COMPUTE_LOG_ENABLE"); @@ -36,12 +48,21 @@ public: } public: + /** + * @brief Get @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise + * @c false + * @return @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise @c + * false + */ bool enabled(void) const { return _enabled; } private: bool _enabled; }; +/** + * @brief static Context class for logging + */ static Context ctx; } // namespace logging diff --git a/runtimes/pure_arm_compute/src/memory.cc b/runtimes/pure_arm_compute/src/memory.cc index 2a9294f86..9e999661a 100644 --- a/runtimes/pure_arm_compute/src/memory.cc +++ b/runtimes/pure_arm_compute/src/memory.cc @@ -18,7 +18,7 @@ #include <sys/mman.h> #include <memory> -#include "nnfw/std/memory.h" +#include "cpp14/memory.h" #include "memory.h" int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset, @@ -31,7 +31,7 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t // Use unique pointer to avoid memory leak std::unique_ptr<ANeuralNetworksMemory> memory_ptr = - nnfw::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset); + nnfw::cpp14::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset); if (memory_ptr == nullptr) { return ANEURALNETWORKS_OUT_OF_MEMORY; diff --git a/runtimes/pure_arm_compute/src/memory.h b/runtimes/pure_arm_compute/src/memory.h index 8bd43b0d6..ffac26ef6 100644 --- a/runtimes/pure_arm_compute/src/memory.h +++ b/runtimes/pure_arm_compute/src/memory.h @@ -14,20 +14,51 @@ * limitations under the License. */ +/** + * @file execution.h + * @brief This file defines ANeuralNetworksMemory class for handling Memory NNAPI + * @ingroup COM_AI_RUNTIME + */ + #ifndef __MEMORY_H__ #define __MEMORY_H__ #include <cstdint> +/** + * @brief struct to define Memory NNAPI + */ struct ANeuralNetworksMemory { public: + /** + * @brief Constructor with params + * @param [in] size The requested size in bytes + * @param [in] protect The desired memory protection for the mapping + * @param [in] fd The requested file descriptor + * @param [in] offset The offset to the beginning of the file of the area to map + */ ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset); + /** + * @brief Destructor default + */ ~ANeuralNetworksMemory(); public: + /** + * @brief Get size + * @return size + */ size_t size(void) const { return _size; } + /** + * @brief Get base pointer + * @return base pointer + */ uint8_t *base(void) { return _base; } + /** + * @brief Get base pointer + * @return const base pointer + */ const uint8_t *base(void) const { return _base; } private: diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc index 49ea59f17..2c4120d7a 100644 --- a/runtimes/pure_arm_compute/src/model.cc +++ b/runtimes/pure_arm_compute/src/model.cc @@ -602,6 +602,28 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_SPACE_TO_BATCH_ND: + { + using internal::tflite::op::SpaceToBatchND::Param; + using internal::tflite::op::SpaceToBatchND::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_BATCH_TO_SPACE_ND: + { + using internal::tflite::op::BatchToSpaceNd::Param; + using internal::tflite::op::BatchToSpaceNd::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } case ANEURALNETWORKS_L2_POOL_2D: { // Input count is 7 for Implicit Padding @@ -675,6 +697,29 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION: + { + + using internal::tflite::op::LocalResponseNormalization::Param; + using internal::tflite::op::LocalResponseNormalization::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_DEPTH_TO_SPACE: + { + using internal::tflite::op::DepthToSpace::Param; + using internal::tflite::op::DepthToSpace::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } default: throw std::runtime_error{"Not supported operation"}; }; @@ -706,6 +751,18 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_REDUCE_MIN_EX: + { + using internal::tflite::op::ReduceMin::Param; + using internal::tflite::op::ReduceMin::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } case ANEURALNETWORKS_TENSORFLOW_MAX_EX: { using internal::tflite::op::ReduceMax::Param; @@ -718,6 +775,53 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_PRELU_EX: + { + using internal::tflite::op::PReLU::Param; + using internal::tflite::op::PReLU::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_TRANSPOSE_CONV_EX: + { + using internal::tflite::op::TransposeConv::Param; + using internal::tflite::op::TransposeConv::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_LOGICAL_AND_EX: + { + using internal::tflite::op::LogicalAnd::Param; + using internal::tflite::op::LogicalAnd::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_LOGICAL_OR_EX: + { + using internal::tflite::op::LogicalOr::Param; + using internal::tflite::op::LogicalOr::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } case ANEURALNETWORKS_RSQRT_EX: { using internal::tflite::op::RSQRT::Param; @@ -730,6 +834,30 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_SQRT_EX: + { + using internal::tflite::op::SQRT::Param; + using internal::tflite::op::SQRT::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_EQUAL_EX: + { + using internal::tflite::op::Equal::Param; + using internal::tflite::op::Equal::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } case ANEURALNETWORKS_SQUARED_DIFFERENCE_EX: { using internal::tflite::op::SquaredDifference::Param; @@ -778,6 +906,101 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_UNPACK_EX: + { + using internal::tflite::op::Unpack::Param; + using internal::tflite::op::Unpack::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_NEG_EX: + { + using internal::tflite::op::Neg::Param; + using internal::tflite::op::Neg::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_EXP_EX: + { + using internal::tflite::op::Exp::Param; + using internal::tflite::op::Exp::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_REDUCE_SUM_EX: + { + using internal::tflite::op::ReduceSum::Param; + using internal::tflite::op::ReduceSum::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_PACK_EX: + { + using internal::tflite::op::Pack::Param; + using internal::tflite::op::Pack::Node; + + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_ABS_EX: + { + using internal::tflite::op::Abs::Param; + using internal::tflite::op::Abs::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_ARGMAX_EX: + { + using internal::tflite::op::ArgMax::Param; + using internal::tflite::op::ArgMax::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + case ANEURALNETWORKS_NOT_EQUAL_EX: + { + using internal::tflite::op::NotEqual::Param; + using internal::tflite::op::NotEqual::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs}); + + break; + } + default: throw std::runtime_error{"Not supported operation"}; } diff --git a/runtimes/pure_arm_compute/src/model.h b/runtimes/pure_arm_compute/src/model.h index a7e606201..8acc894f4 100644 --- a/runtimes/pure_arm_compute/src/model.h +++ b/runtimes/pure_arm_compute/src/model.h @@ -14,22 +14,52 @@ * limitations under the License. */ +/** + * @file model.h + * @brief This file contains ANeuralNetworksModel classe for handling Model NNAPI such as + * ANeuralNetworksModel_create, ANeuralNetworksModel_addOperand + * @ingroup COM_AI_RUNTIME + */ + #ifndef __MODEL_H__ #define __MODEL_H__ #include "internal/Model.h" +/** + * @brief struct to express Model of NNAPI + */ struct ANeuralNetworksModel { public: + /** + * @brief Construct without params + */ ANeuralNetworksModel(); public: + /** + * @brief Get reference of internal::tflite::Model + * @return Reference of internal::tflite::Model + */ internal::tflite::Model &deref(void) { return *_model; } public: + /** + * @brief Release internal::tflite::Model pointer to param + * @param [in] model To get released internal::tflite::Model pointer + * @return N/A + */ void release(std::shared_ptr<const internal::tflite::Model> &model) { model = _model; } + /** + * @brief Get @c true if ANeuralNetworksModel_finish has been called, otherwise @c false + * @return @c true if ANeuralNetworksModel_finish has been called, otherwise @c false + */ bool isFinished() { return _isFinished == true; } + /** + * @brief Mark model process finished + * @return N/A + */ void markAsFinished() { _isFinished = true; } private: |