/*
 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "StageGenerator.h"

#include <stdexcept>

#include "cpp14/memory.h"
#include "util/Padding.h"
#include "kernel/cpu/OperationUtils.h"
#include "kernel/cpu/ConvolutionLayer.h"
#include "kernel/cpu/AvgPoolLayer.h"
#include "kernel/cpu/MaxPoolLayer.h"
#include "kernel/cpu/ConcatLayer.h"
#include "kernel/cpu/FullyConnectedLayer.h"
#include "kernel/cpu/ReshapeLayer.h"
#include "kernel/cpu/SoftMaxLayer.h"
#include "kernel/cpu/PermuteLayer.h"
#include "backend/BackendManager.h"
#include "backend/interface/IConfig.h"

#include "util/logging.h"

#include "util/Utils.h"

namespace neurun
{
namespace backend
{
namespace cpu
{

StageGenerator::StageGenerator(const neurun::model::operand::Set &operand_ctx,
                               const std::shared_ptr<TensorBuilder> &tensor_builder)
    : _ctx(operand_ctx), _tensor_builder(tensor_builder)
{
  // DO NOTHING
}

void StageGenerator::visit(const model::operation::Conv2DNode &node)
{
  using model::operation::Conv2DNode;

  const auto ofm_index{node.getOutputs().at(0)};
  const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)};
  const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
  const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};

  const auto vstride_index{node.param().vstride_index};
  const auto hstride_index{node.param().hstride_index};

  const auto padding_index{node.param().padding_index};
  const auto activation_index{node.param().activation_index};

  const PaddingCode padding_type =
      static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());

  assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
         (ANEURALNETWORKS_PADDING_VALID == padding_type));

  util::Stride stride;

  stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
  stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();

  // Construct operation parameters
  struct Param
  {
    model::operand::Index ofm_index;
    model::operand::Index ifm_index;
    model::operand::Index ker_index;
    model::operand::Index bias_index;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;
    ::neurun::kernel::cpu::Shape ker_shape;
    ::neurun::kernel::cpu::Shape bias_shape;

    util::Padding padding;
    util::Stride stride;

    FuseCode activation;
  };

  Param param;

  param.ofm_index = ofm_index;
  param.ifm_index = ifm_index;
  param.ker_index = ker_index;
  param.bias_index = bias_index;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ifm_index));
  param.ker_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ker_index));
  param.bias_shape = ::neurun::kernel::cpu::getShape(_ctx.at(bias_index));

  param.stride = stride;
  param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
                      ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
                                           _ctx.at(ofm_index).shape().asFeature(), stride,
                                           _ctx.at(ker_index).shape().asKernel().W,
                                           _ctx.at(ker_index).shape().asKernel().H)
                      : util::valid_padding();

  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto ofm_alloc = tensors->at(param.ofm_index);
    auto ifm_alloc = tensors->at(param.ifm_index);
    auto ker_alloc = tensors->at(param.ker_index);
    auto bias_alloc = tensors->at(param.bias_index);

    std::unique_ptr<::neurun::kernel::cpu::ConvolutionLayer> fn{
        new ::neurun::kernel::cpu::ConvolutionLayer};

    fn->configure(ifm_alloc->buffer(), param.ifm_shape, ker_alloc->buffer(), param.ker_shape,
                  bias_alloc->buffer(), param.bias_shape, param.padding.left, param.padding.right,
                  param.padding.top, param.padding.bottom, param.stride.horizontal,
                  param.stride.vertical, param.activation, ofm_alloc->buffer(), param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::MaxPool2DNode &node)
{
  const auto ofm_index{node.getOutputs().at(0)};
  const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};

  const auto kh_index{node.param().kh_index};
  const auto kw_index{node.param().kw_index};

  const auto vstride_index{node.param().vstride_index};
  const auto hstride_index{node.param().hstride_index};

  const auto padding_index{node.param().padding_index};
  const auto activation_index{node.param().activation_index};

  const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
  const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();

  const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
  const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();

  const PaddingCode padding_type =
      static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());

  // Construct operation parameters
  struct Param
  {
    model::operand::Index ofm_index;
    model::operand::Index ifm_index;

    uint32_t kw;
    uint32_t kh;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;

    util::Padding padding;
    util::Stride stride;

    FuseCode activation;
  };

  Param param;

  param.ofm_index = ofm_index;
  param.ifm_index = ifm_index;

  param.kh = kh;
  param.kw = kw;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ifm_index));

  param.stride.vertical = vstride;
  param.stride.horizontal = hstride;

  param.padding =
      (padding_type == ANEURALNETWORKS_PADDING_SAME)
          ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
                               _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
          : util::valid_padding();

  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto ofm_alloc = tensors->at(param.ofm_index).get();
    auto ifm_alloc = tensors->at(param.ifm_index).get();

    std::unique_ptr<::neurun::kernel::cpu::MaxPoolLayer> fn{
        new ::neurun::kernel::cpu::MaxPoolLayer};

    fn->configure(ifm_alloc->buffer(), param.ifm_shape, param.padding.left, param.padding.right,
                  param.padding.top, param.padding.bottom, param.stride.horizontal,
                  param.stride.vertical, param.kw, param.kh, param.activation, ofm_alloc->buffer(),
                  param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::AvgPool2DNode &node)
{
  const auto ofm_index{node.getOutputs().at(0)};
  const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};

  const auto kh_index{node.param().kh_index};
  const auto kw_index{node.param().kw_index};

  const auto vstride_index{node.param().vstride_index};
  const auto hstride_index{node.param().hstride_index};

  const auto padding_index{node.param().padding_index};
  const auto activation_index{node.param().activation_index};

  const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
  const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();

  const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
  const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();

  const PaddingCode padding_type =
      static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());

  assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
         (ANEURALNETWORKS_PADDING_VALID == padding_type));

  // Construct operation parameters
  struct Param
  {
    model::operand::Index ofm_index;
    model::operand::Index ifm_index;

    uint32_t kw;
    uint32_t kh;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;

    util::Padding padding;
    util::Stride stride;

    FuseCode activation;
  };

  Param param;

  param.ofm_index = ofm_index;
  param.ifm_index = ifm_index;

  param.kh = kh;
  param.kw = kw;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ifm_index));

  param.stride.vertical = vstride;
  param.stride.horizontal = hstride;

  param.padding =
      (padding_type == ANEURALNETWORKS_PADDING_SAME)
          ? util::same_padding(_ctx.at(ifm_index).shape().asFeature(),
                               _ctx.at(ofm_index).shape().asFeature(), param.stride, kw, kh)
          : util::valid_padding();

  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto ofm_alloc = tensors->at(param.ofm_index).get();
    auto ifm_alloc = tensors->at(param.ifm_index).get();

    std::unique_ptr<::neurun::kernel::cpu::AvgPoolLayer> fn{
        new ::neurun::kernel::cpu::AvgPoolLayer};

    fn->configure(ifm_alloc->buffer(), param.ifm_shape, param.padding.left, param.padding.right,
                  param.padding.top, param.padding.bottom, param.stride.horizontal,
                  param.stride.vertical, param.kw, param.kh, param.activation, ofm_alloc->buffer(),
                  param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::ConcatNode &node)
{
  const auto ofm_index{node.getOutputs().at(0)};
  const auto axis_index{node.param().axis_index};

  struct Param
  {
    model::operand::Index output_index;
    std::vector<model::operand::Index> input_indexes;

    int32_t axis;

    ::neurun::kernel::cpu::Shape ofm_shape;
    std::vector<::neurun::kernel::cpu::Shape> ifm_shapes;
  };

  Param param;

  param.output_index = ofm_index;
  for (const auto &e : node.getInputs())
  {
    param.input_indexes.emplace_back(e);
  }
  param.axis = _ctx.at(axis_index).asScalar<int32_t>();

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(ofm_index));

  for (auto e : node.getInputs())
  {
    param.ifm_shapes.emplace_back(::neurun::kernel::cpu::getShape(_ctx.at(e)));
  }

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto output_alloc = tensors->at(param.output_index).get();

    std::vector<const uint8_t *> input_buffers;
    for (auto ifm_ind : param.input_indexes)
    {
      input_buffers.emplace_back(tensors->at(ifm_ind).get()->buffer());
    }

    std::unique_ptr<::neurun::kernel::cpu::ConcatLayer> fn{new ::neurun::kernel::cpu::ConcatLayer};

    fn->configure(input_buffers, param.ifm_shapes, param.axis, output_alloc->buffer(),
                  param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::FullyConnectedNode &node)
{
  using model::operation::FullyConnectedNode;

  const auto output_index{node.getOutputs().at(0)};
  const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)};
  const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)};
  const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)};
  const auto activation_index{node.param().activation_index};

  // Construct operation parameters
  struct Param
  {
    model::operand::Index output_index;
    model::operand::Index input_index;
    model::operand::Index weight_index;
    model::operand::Index bias_index;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;
    ::neurun::kernel::cpu::Shape weight_shape;
    ::neurun::kernel::cpu::Shape bias_shape;

    FuseCode activation;
  };

  Param param;

  param.output_index = output_index;
  param.input_index = input_index;
  param.weight_index = weight_index;
  param.bias_index = bias_index;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));
  param.weight_shape = ::neurun::kernel::cpu::getShape(_ctx.at(weight_index));
  param.bias_shape = ::neurun::kernel::cpu::getShape(_ctx.at(bias_index));

  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto output_alloc = tensors->at(param.output_index).get();
    auto input_alloc = tensors->at(param.input_index).get();
    auto weight_alloc = tensors->at(param.weight_index).get();
    auto bias_alloc = tensors->at(param.bias_index).get();

    std::unique_ptr<::neurun::kernel::cpu::FullyConnectedLayer> fn{
        new ::neurun::kernel::cpu::FullyConnectedLayer};

    fn->configure(input_alloc->buffer(), param.ifm_shape, weight_alloc->buffer(),
                  param.weight_shape, bias_alloc->buffer(), param.bias_shape, param.activation,
                  output_alloc->buffer(), param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::ReshapeNode &node)
{
  const auto output_index{node.getOutputs().at(0)};
  const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)};

  struct Param
  {
    model::operand::Index output_index;
    model::operand::Index input_index;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;
  };

  Param param;

  param.output_index = output_index;
  param.input_index = input_index;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto output_alloc = tensors->at(param.output_index).get();
    auto input_alloc = tensors->at(param.input_index).get();

    std::unique_ptr<::neurun::kernel::cpu::ReshapeLayer> fn{
        new ::neurun::kernel::cpu::ReshapeLayer};

    fn->configure(input_alloc->buffer(), param.ifm_shape, output_alloc->buffer(), param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::SoftmaxNode &node)
{
  const auto output_index{node.getOutputs().at(0)};
  const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)};
  const auto scale_index{node.param().scale_index};

  struct Param
  {
    model::operand::Index output_index;
    model::operand::Index input_index;

    ::neurun::kernel::cpu::Shape ofm_shape;
    ::neurun::kernel::cpu::Shape ifm_shape;

    float scale;
  };

  Param param;

  param.output_index = output_index;
  param.input_index = input_index;

  param.ofm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(output_index));
  param.ifm_shape = ::neurun::kernel::cpu::getShape(_ctx.at(input_index));

  param.scale = _ctx.at(scale_index).asScalar<float>();

  auto tensors = _tensor_builder;

  returnStage([tensors, param](IExecutionBuilder &builder) {
    auto output_alloc = tensors->at(param.output_index).get();
    auto input_alloc = tensors->at(param.input_index).get();

    std::unique_ptr<::neurun::kernel::cpu::SoftMaxLayer> fn{
        new ::neurun::kernel::cpu::SoftMaxLayer};

    fn->configure(input_alloc->buffer(), param.ifm_shape, param.scale, output_alloc->buffer(),
                  param.ofm_shape);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::PermuteNode &node)
{
  const auto output_index{node.getOutputs().at(0)};
  const auto input_index{node.getInputs().at(0)};

  using PermuteType = model::operation::PermuteNode::Type;

  struct Param
  {
    model::operand::Index output_index;
    model::operand::Index input_index;

    model::operand::Shape shape;

    PermuteType type{PermuteType::COPY};
  };

  Param param;

  param.output_index = output_index;
  param.input_index = input_index;

  param.shape = _ctx.at(output_index).shape();
  param.type = node.param().type;

  //  assert(param.shape == _ctx.at(input_index));

  const auto &input_li = _ctx.at(input_index).lower_info();
  const auto &output_li = _ctx.at(output_index).lower_info();
  const auto input_backend = input_li->def_backends().getOnlyElement();
  const auto output_backend = output_li->def_backends().getOnlyElement();

  const auto input_tensors = input_backend->tensor_builder();
  const auto output_tensors = output_backend->tensor_builder();

  returnStage([input_tensors, output_tensors, param](IExecutionBuilder &builder) {
    auto output_object = output_tensors->wrapTensor(param.output_index);
    auto input_object = input_tensors->wrapTensor(param.input_index);

    auto fn = nnfw::cpp14::make_unique<::neurun::kernel::cpu::PermuteLayer>();

    fn->configure(input_object, output_object, param.shape, param.type);

    builder.append(std::move(fn));
  });
}

void StageGenerator::visit(const model::operation::AddNode &) { throw std::runtime_error("NYI"); }

} // namespace neurun
} // namespace backend
} // namespace cpu