1 files changed, 206 insertions, 151 deletions
diff --git a/runtimes/neurun/src/backend/acl_cl/StageGenerator.cc b/runtimes/neurun/src/backend/acl_cl/StageGenerator.cc
index c63698fd8..89bbd7bd2 100644
--- a/runtimes/neurun/src/backend/acl_cl/StageGenerator.cc
+++ b/runtimes/neurun/src/backend/acl_cl/StageGenerator.cc
@@ -16,6 +16,8 @@
 
 #include "backend/acl_cl/StageGenerator.h"
 
+#include "kernel/acl_cl/CLFunction.h"
+
 #include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h>
 #include <arm_compute/runtime/CL/functions/CLPoolingLayer.h>
 #include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
@@ -25,20 +27,27 @@
 
 #include "kernel/acl_cl/ConcatLayer.h"
 
-#include "internal/Padding.h"
+#include "util/Padding.h"
 
-#include "graph/operand/Index.h"
+#include "model/operand/Index.h"
 
-#include "logging.h"
+#include "util/logging.h"
 
 #include "NeuralNetworks.h"
 
-#include "support/nnapi/Utils.h"
+#include "util/Utils.h"
 
 template <typename T> std::unique_ptr<T> make_layer(void) { return std::unique_ptr<T>{new T}; }
 
-::arm_compute::PadStrideInfo asPadStringInfo(const ::internal::Padding &padding,
-                                             const ::internal::Stride &stride)
+std::unique_ptr<::neurun::kernel::acl_cl::CLFunction>
+make_cl_function(std::unique_ptr<::arm_compute::IFunction> &&layer)
+{
+  return std::unique_ptr<::neurun::kernel::acl_cl::CLFunction>(
+      new ::neurun::kernel::acl_cl::CLFunction(std::move(layer)));
+}
+
+::arm_compute::PadStrideInfo asPadStringInfo(const neurun::util::Padding &padding,
+                                             const neurun::util::Stride &stride)
 {
   return ::arm_compute::PadStrideInfo{stride.horizontal,
                                       stride.vertical,
@@ -86,7 +95,9 @@ void ActivationBuilder::appendReLU(::arm_compute::ICLTensor *ifm_alloc)
 
   fn->configure(ifm_alloc, nullptr, act_info);
 
-  _builder.append(std::move(fn));
+  auto acl_fn = make_cl_function(std::move(fn));
+
+  _builder.append(std::move(acl_fn));
 }
 
 void ActivationBuilder::append(FuseCode code, ::arm_compute::ICLTensor *ifm_alloc)
@@ -113,25 +124,27 @@ void ActivationBuilder::append(FuseCode code, ::arm_compute::ICLTensor *ifm_allo
 //
 // StageGenerator
 //
-StageGenerator::StageGenerator(const neurun::graph::operand::Set &ctx,
+StageGenerator::StageGenerator(const neurun::model::operand::Set &ctx,
                                const std::shared_ptr<TensorBuilder> &tensor_builder)
     : _ctx(ctx), _tensor_builder(tensor_builder)
 {
   // DO NOTHING
 }
 
-Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::Conv2DNode &node)
 {
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index ker_index{node.getInputs().at(1)};
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
+  using model::operation::Conv2DNode;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  const auto padding_index{node.param().padding_index};
+  const auto activation_index{node.param().activation_index};
 
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
@@ -143,7 +156,7 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
   assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
          (ANEURALNETWORKS_PADDING_VALID == padding_type));
 
-  ::internal::Stride stride;
+  neurun::util::Stride stride;
 
   stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
   stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
@@ -151,64 +164,67 @@ Stage StageGenerator::generate(const graph::operation::Conv2D::Implicit::Node &n
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
-    int ker_index;
-    int bias_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
+    model::operand::Index ker_index;
+    model::operand::Index bias_index;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    neurun::util::Padding padding;
+    neurun::util::Stride stride;
 
     FuseCode activation;
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
-  param.ker_index = ker_index.asInt();
-  param.bias_index = bias_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
+  param.ker_index = ker_index;
+  param.bias_index = bias_index;
 
   param.stride = stride;
   param.padding =
       (padding_type == ANEURALNETWORKS_PADDING_SAME)
-          ? ::internal::same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
-          : ::internal::valid_padding();
+          ? neurun::util::same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+          : neurun::util::valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get();
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get();
-    auto ker_alloc = tensors->at(::neurun::graph::operand::Index{param.ker_index}).get();
-    auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto ifm_alloc = tensors->at(param.ifm_index).get();
+    auto ker_alloc = tensors->at(param.ker_index).get();
+    auto bias_alloc = tensors->at(param.bias_index).get();
 
     const auto conv_info = asPadStringInfo(param.padding, param.stride);
 
     std::unique_ptr<::arm_compute::CLConvolutionLayer> fn{new ::arm_compute::CLConvolutionLayer};
 
-    fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info);
+    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+                  ofm_alloc->handle(), conv_info);
 
-    builder.append(std::move(fn));
+    auto acl_fn = make_cl_function(std::move(fn));
 
-    ActivationBuilder{builder}.append(param.activation, ofm_alloc);
-  };
+    builder.append(std::move(acl_fn));
+
+    ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle());
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::MaxPool2DNode &node)
 {
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};
 
-  const ::neurun::graph::operand::Index kh_index{node.param().kh_index};
-  const ::neurun::graph::operand::Index kw_index{node.param().kw_index};
+  const auto kh_index{node.param().kh_index};
+  const auto kw_index{node.param().kw_index};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
+  const auto padding_index{node.param().padding_index};
 
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
@@ -225,22 +241,22 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
 
     uint32_t kw;
     uint32_t kh;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    neurun::util::Padding padding;
+    neurun::util::Stride stride;
 
     // TODO Add 'activation' field
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
 
   param.kh = kh;
   param.kw = kw;
@@ -249,8 +265,8 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
   param.stride.horizontal = hstride;
 
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? ::internal::same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
-                      : ::internal::valid_padding();
+                      ? neurun::util::same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+                      : neurun::util::valid_padding();
 
   VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
   VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
@@ -267,9 +283,9 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get();
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto ifm_alloc = tensors->at(param.ifm_index).get();
 
     ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
                                          ::arm_compute::Size2D{param.kw, param.kh},
@@ -277,24 +293,26 @@ Stage StageGenerator::generate(const graph::operation::MaxPool2D::Implicit::Node
 
     std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
 
-    fn->configure(ifm_alloc, ofm_alloc, info);
+    fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
 
-    builder.append(std::move(fn));
-  };
+    auto acl_fn = make_cl_function(std::move(fn));
+
+    builder.append((std::move(acl_fn)));
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node &node)
+void StageGenerator::visit(const model::operation::AvgPool2DNode &node)
 {
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index ifm_index{node.getInputs().at(0)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};
 
-  const ::neurun::graph::operand::Index kh_index{node.param().kh_index};
-  const ::neurun::graph::operand::Index kw_index{node.param().kw_index};
+  const auto kh_index{node.param().kh_index};
+  const auto kw_index{node.param().kw_index};
 
-  const ::neurun::graph::operand::Index vstride_index{node.param().vstride_index};
-  const ::neurun::graph::operand::Index hstride_index{node.param().hstride_index};
+  const auto vstride_index{node.param().vstride_index};
+  const auto hstride_index{node.param().hstride_index};
 
-  const ::neurun::graph::operand::Index padding_index{node.param().padding_index};
+  const auto padding_index{node.param().padding_index};
 
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
@@ -314,22 +332,22 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
   // Construct operation parameters
   struct Param
   {
-    int ofm_index;
-    int ifm_index;
+    model::operand::Index ofm_index;
+    model::operand::Index ifm_index;
 
     uint32_t kw;
     uint32_t kh;
 
-    ::internal::Padding padding;
-    ::internal::Stride stride;
+    neurun::util::Padding padding;
+    neurun::util::Stride stride;
 
     // TODO Add 'activation' field
   };
 
   Param param;
 
-  param.ofm_index = ofm_index.asInt();
-  param.ifm_index = ifm_index.asInt();
+  param.ofm_index = ofm_index;
+  param.ifm_index = ifm_index;
 
   param.kh = kh;
   param.kw = kw;
@@ -338,8 +356,8 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
   param.stride.horizontal = hstride;
 
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? ::internal::same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
-                      : ::internal::valid_padding();
+                      ? neurun::util::same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+                      : neurun::util::valid_padding();
 
   VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
   VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
@@ -349,7 +367,7 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
   VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
   VERBOSE(AvgPool2D) << "STRIDE_H: " << vstride << std::endl;
   VERBOSE(AvgPool2D) << "STRIDE_W: " << hstride << std::endl;
-  VERBOSE(AvgPool2D) << "PAD: " << ::nnfw::support::nnapi::to_string(padding_type) << std::endl;
+  VERBOSE(AvgPool2D) << "PAD: " << neurun::util::to_string(padding_type) << std::endl;
   VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl;
   VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
   VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl;
@@ -357,9 +375,9 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto ofm_alloc = tensors->at(::neurun::graph::operand::Index{param.ofm_index}).get();
-    auto ifm_alloc = tensors->at(::neurun::graph::operand::Index{param.ifm_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto ofm_alloc = tensors->at(param.ofm_index).get();
+    auto ifm_alloc = tensors->at(param.ifm_index).get();
 
     ::arm_compute::PoolingLayerInfo info{
         ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{param.kw, param.kh},
@@ -367,170 +385,207 @@ Stage StageGenerator::generate(const graph::operation::AvgPool2D::Implicit::Node
 
     std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
 
-    fn->configure(ifm_alloc, ofm_alloc, info);
+    fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
 
-    builder.append(std::move(fn));
-  };
+    auto acl_fn = make_cl_function(std::move(fn));
+
+    builder.append((std::move(acl_fn)));
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Concat::Node &node)
+void StageGenerator::visit(const model::operation::ConcatNode &node)
 {
-  const ::neurun::graph::operand::Index ofm_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index axis_index{node.param().axis_index};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto axis_index{node.param().axis_index};
 
   struct Param
   {
-    int32_t output_index;
-    std::vector<int32_t> input_indexes;
+    model::operand::Index output_index;
+    std::vector<model::operand::Index> input_indexes;
 
     int32_t axis;
   };
 
   Param param;
 
-  param.output_index = ofm_index.asInt();
+  param.output_index = ofm_index;
   for (const auto &e : node.getInputs())
   {
-    param.input_indexes.emplace_back(e.asInt());
+    param.input_indexes.emplace_back(e);
   }
   param.axis = _ctx.at(axis_index).asScalar<int32_t>();
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    // If tensor allocator allocate as subtensor
+    bool canEliminate = true;
+    for (auto ifm_ind : param.input_indexes)
+    {
+      if (!tensors->isSubTensorOf(param.output_index, ifm_ind))
+      {
+        canEliminate = false;
+        break;
+      }
+    }
+    if (canEliminate)
+    {
+      // If concat eliminated, return with nothing to do
+      return;
+    }
+
+    auto output_alloc = tensors->at(param.output_index).get();
 
-    std::vector<::arm_compute::ICLTensor *> input_allocs;
+    std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> input_allocs;
     for (auto ifm_ind : param.input_indexes)
     {
-      input_allocs.emplace_back(tensors->at(::neurun::graph::operand::Index{ifm_ind}).get());
+      input_allocs.emplace_back(
+          dynamic_cast<::neurun::backend::acl_cl::operand::CLTensor *>(tensors->at(ifm_ind).get()));
     }
 
     std::unique_ptr<::neurun::kernel::acl_cl::ConcatLayer> fn{
         new ::neurun::kernel::acl_cl::ConcatLayer};
 
-    fn->configure(input_allocs, param.axis, output_alloc);
+    fn->configure(input_allocs, param.axis,
+                  dynamic_cast<::neurun::backend::acl_cl::operand::CLTensor *>(output_alloc));
 
-    builder.append(std::move(fn));
-  };
+    auto acl_fn = make_cl_function(std::move(fn));
+
+    builder.append(std::move(acl_fn));
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::FullyConnected::Node &node)
+void StageGenerator::visit(const model::operation::FullyConnectedNode &node)
 {
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index weight_index{node.getInputs().at(1)};
-  const ::neurun::graph::operand::Index bias_index{node.getInputs().at(2)};
-  const ::neurun::graph::operand::Index activation_index{node.param().activation_index};
+  using model::operation::FullyConnectedNode;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)};
+  const auto activation_index{node.param().activation_index};
 
   // Construct operation parameters
   struct Param
   {
-    int output_index;
+    model::operand::Index output_index;
 
-    int input_index;
-    int weight_index;
-    int bias_index;
+    model::operand::Index input_index;
+    model::operand::Index weight_index;
+    model::operand::Index bias_index;
 
     FuseCode activation;
   };
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
-  param.weight_index = weight_index.asInt();
-  param.bias_index = bias_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
+  param.weight_index = weight_index;
+  param.bias_index = bias_index;
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
-    auto weight_alloc = tensors->at(::neurun::graph::operand::Index{param.weight_index}).get();
-    auto bias_alloc = tensors->at(::neurun::graph::operand::Index{param.bias_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
+    auto weight_alloc = tensors->at(param.weight_index).get();
+    auto bias_alloc = tensors->at(param.bias_index).get();
 
     auto fn = make_layer<::arm_compute::CLFullyConnectedLayer>();
 
-    fn->configure(input_alloc, weight_alloc, bias_alloc, output_alloc);
+    fn->configure(input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(),
+                  output_alloc->handle());
 
-    builder.append(std::move(fn));
+    auto acl_fn = make_cl_function(std::move(fn));
 
-    ActivationBuilder{builder}.append(param.activation, output_alloc);
-  };
+    builder.append(std::move(acl_fn));
+
+    ActivationBuilder{builder}.append(param.activation, output_alloc->handle());
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Reshape::Node &node)
+void StageGenerator::visit(const model::operation::ReshapeNode &node)
 {
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)};
 
   struct Param
   {
-    int output_index;
-    int input_index;
+    model::operand::Index output_index;
+    model::operand::Index input_index;
   };
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
 
     auto fn = make_layer<::arm_compute::CLReshapeLayer>();
 
-    fn->configure(input_alloc, output_alloc);
+    fn->configure(input_alloc->handle(), output_alloc->handle());
 
-    builder.append(std::move(fn));
-  };
+    auto acl_fn = make_cl_function(std::move(fn));
+
+    builder.append(std::move(acl_fn));
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::Softmax::Node &node)
+void StageGenerator::visit(const model::operation::SoftmaxNode &node)
 {
-  const ::neurun::graph::operand::Index output_index{node.getOutputs().at(0)};
-  const ::neurun::graph::operand::Index input_index{node.getInputs().at(0)};
-  const ::neurun::graph::operand::Index scale_index{node.param().scale_index};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)};
+  const auto scale_index{node.param().scale_index};
 
   assert(_ctx.at(scale_index).shape().rank() == 0);
 
   struct Param
   {
-    int output_index;
-    int input_index;
+    model::operand::Index output_index;
+    model::operand::Index input_index;
     float scale;
   };
 
   Param param;
 
-  param.output_index = output_index.asInt();
-  param.input_index = input_index.asInt();
+  param.output_index = output_index;
+  param.input_index = input_index;
   param.scale = _ctx.at(scale_index).asScalar<float>();
 
   auto tensors = _tensor_builder;
 
-  return [tensors, param](IExecutionBuilder &builder) {
-    auto output_alloc = tensors->at(::neurun::graph::operand::Index{param.output_index}).get();
-    auto input_alloc = tensors->at(::neurun::graph::operand::Index{param.input_index}).get();
+  returnStage([tensors, param](IExecutionBuilder &builder) {
+    auto output_alloc = tensors->at(param.output_index).get();
+    auto input_alloc = tensors->at(param.input_index).get();
 
     auto fn = make_layer<::arm_compute::CLSoftmaxLayer>();
 
-    fn->configure(input_alloc, output_alloc, param.scale);
+    fn->configure(input_alloc->handle(), output_alloc->handle(), param.scale);
 
-    builder.append(std::move(fn));
-  };
+    auto acl_fn = make_cl_function(std::move(fn));
+
+    builder.append(std::move(acl_fn));
+  });
 }
 
-Stage StageGenerator::generate(const graph::operation::NOP::Node & /* node */)
+void StageGenerator::visit(const model::operation::PermuteNode & /* node */)
 {
-  // DO NOTHING
-  return nullptr;
+  throw "Unsupported";
+}
+
+void StageGenerator::visit(const model::operation::AddNode &)
+{
+  VERBOSE(Add) << "generate CPU Add" << std::endl;
+
+  throw std::runtime_error("NYI");
 }
 
 } // namespace acl_cl