Imported Upstream version 1.1.0upstream/1.1.0 submit/tizen/20200304.094649 submit/tizen/20200304.093946 submit/tizen/20200304.092919 accepted/tizen/unified/20200305.051107

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-03-04 18:09:24 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-03-04 18:09:24 +0900
commit: 302e6564a7a76109e1178207e44e45a58631c477 (patch)
tree: 6cc4bd95e5e438331fc2c53234af4ed0e0f3bc20 /compute/cker/include/cker/operation
parent: bd11b24234d7d43dfe05a81c520aa01ffad06e42 (diff)
download: nnfw-302e6564a7a76109e1178207e44e45a58631c477.tar.gz
nnfw-302e6564a7a76109e1178207e44e45a58631c477.tar.bz2
nnfw-302e6564a7a76109e1178207e44e45a58631c477.zip
19 files changed, 2257 insertions, 0 deletions
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h
new file mode 100644
index 000000000..b20919429
--- /dev/null
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_AVERAGE_POOL_H__
+#define __NNFW_CKER_AVERAGE_POOL_H__
+
+#if defined(CKER_OPTIMIZED_EIGEN)
+#include "cker/operation/optimized/AveragePool.h"
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#include "cker/operation/reference/AveragePool.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                        const Shape &output_shape, float *output_data)
+{
+#if defined(CKER_OPTIMIZED_EIGEN)
+  optimized::AveragePool(params, input_shape, input_data, output_shape, output_data);
+#else  // defined(CKER_OPTIMIZED_EIGEN)
+  reference::AveragePool(params, input_shape, input_data, output_shape, output_data);
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+}
+
+inline void AveragePool(const PoolParams &params, const Shape &input_shape,
+                        const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+  assert(params.quantized_activation_min <= params.quantized_activation_max);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+        const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+        // Compute the boundaries of the filter region clamped so as to
+        // ensure that the filter window fits in the input array.
+        const int filter_x_start = std::max(0, -in_x_origin);
+        const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+        const int filter_y_start = std::max(0, -in_y_origin);
+        const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+        int filter_count = (filter_y_end - filter_y_start) * (filter_x_end - filter_x_start);
+        if (filter_count <= 0)
+        {
+          continue;
+        }
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          int32_t acc = 0;
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+              acc += input_data[Offset(input_shape, batch, in_y, in_x, channel)];
+            }
+          }
+          acc = (acc + filter_count / 2) / filter_count;
+          acc = std::max(acc, params.quantized_activation_min);
+          acc = std::min(acc, params.quantized_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
+              static_cast<uint8_t>(acc);
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_AVERAGE_POOL_H__
diff --git a/compute/cker/include/cker/operation/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
new file mode 100644
index 000000000..60dd02651
--- /dev/null
+++ b/compute/cker/include/cker/operation/BinaryArithmeticOps.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+#define __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
+
+#include <functional>
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct BinaryArithmeticOpParam
+{
+  // Shape dependent / common to data / op types.
+  // BroadcastableOpCategory broadcast_category;
+  // uint8 inference params.
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int32_t output_shift;
+  // Add / Sub, not Mul, uint8 inference params.
+  int32_t left_shift;
+  int32_t input1_multiplier;
+  int32_t input1_shift;
+  int32_t input2_multiplier;
+  int32_t input2_shift;
+  // uint8, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+
+  // Processed output dimensions.
+  // Let input "a" be the one that broadcasts in the faster-changing dimension.
+  // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and
+  // {b0, b1, b2, b3, b4},
+  // broadcast_shape[4] = b0 = a0.
+  // broadcast_shape[3] = b1; a1 = 1.
+  // broadcast_shape[2] = b2 = a2.
+  // broadcast_shape[1] = a3; b3 = 1.
+  // broadcast_shape[0] = b4 = a4.
+  // int broadcast_shape[5];
+};
+
+template <typename T>
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                               const T *input1_data, const Shape &input2_shape,
+                               const T *input2_data, const Shape &output_shape, T *output_data,
+                               const std::function<T(const T &, const T &)> &fn)
+{
+  const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+                                                  params.quantized_activation_min,
+                                                  params.quantized_activation_max);
+  }
+}
+
+template <>
+inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
+                               const float *input1_data, const Shape &input2_shape,
+                               const float *input2_data, const Shape &output_shape,
+                               float *output_data,
+                               const std::function<float(const float &, const float &)> &fn)
+{
+  const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < size; i++)
+  {
+    output_data[i] =
+        ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
+                                     params.float_activation_min, params.float_activation_max);
+  }
+}
+
+template <typename T>
+inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params,
+                                            const Shape &input1_shape, const T *input1_data,
+                                            const Shape &input2_shape, const T *input2_data,
+                                            const Shape &output_shape, T *output_data,
+                                            const std::function<T(const T &, const T &)> &fn)
+{
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
+
+  // Comment from tensorflow lite:
+  //
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
+              fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+                 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+              params.quantized_activation_min, params.quantized_activation_max);
+        }
+      }
+    }
+  }
+}
+
+template <>
+inline void BroadcastBinaryArithmeticOpSlow(
+    const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
+    const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
+    float *output_data, const std::function<float(const float &, const float &)> &fn)
+{
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
+  const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
+
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
+              fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+                 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+              params.float_activation_min, params.float_activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_BINARY_ARITHMETIC_OPS_H__
diff --git a/compute/cker/include/cker/operation/Concatenation.h b/compute/cker/include/cker/operation/Concatenation.h
new file mode 100644
index 000000000..69a179c8c
--- /dev/null
+++ b/compute/cker/include/cker/operation/Concatenation.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_CONCATENATION_H__
+#define __NNFW_CKER_CONCATENATION_H__
+
+#include <cstdint>
+
+#include "cker/Shape.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct ConcatenationParams
+{
+  int8_t axis;
+  const int32_t *input_zeropoint;
+  const float *input_scale;
+  uint16_t inputs_count;
+  int32_t output_zeropoint;
+  float output_scale;
+};
+
+template <typename Scalar>
+inline void Concatenation(const ConcatenationParams &params, const Shape *const *input_shapes,
+                          const Scalar *const *input_data, const Shape &output_shape,
+                          Scalar *output_data)
+{
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+  const int concat_dimensions = output_shape.DimensionsCount();
+  assert(axis < concat_dimensions);
+
+  int64_t concat_size = 0;
+  for (int i = 0; i < inputs_count; i++)
+  {
+    assert(input_shapes[i]->DimensionsCount() == concat_dimensions);
+    for (int j = 0; j < concat_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        auto dim_checked = MatchingDim(*input_shapes[i], j, output_shape, j);
+        UNUSED_RELEASE(dim_checked);
+      }
+    }
+    concat_size += input_shapes[i]->Dims(axis);
+  }
+  assert(concat_size == output_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  // For all input arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < concat_dimensions; ++i)
+  {
+    base_inner_size *= output_shape.Dims(i);
+  }
+
+  Scalar *output_ptr = output_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      output_ptr += copy_size;
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_CONCATENATION_H__
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
new file mode 100644
index 000000000..35b0336fa
--- /dev/null
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_CONV_H__
+#define __NNFW_CKER_CONV_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct ConvParams
+{
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  // TODO(starka): This was just "stride", so check that width+height is OK.
+  int16_t stride_width;
+  int16_t stride_height;
+  int16_t dilation_width_factor;
+  int16_t dilation_height_factor;
+  // uint8_t inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+};
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data,
+                 const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+                 const float *bias_data, const Shape &output_shape, float *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  UNUSED_RELEASE(bias_shape);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    assert(bias_shape.FlatSize() == output_depth);
+  }
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - pad_width;
+          const int in_y_origin = (out_y * stride_height) - pad_height;
+          float total = 0.f;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              const int in_y = in_y_origin + dilation_height_factor * filter_y;
+              // If the location is outside the bounds of the input image,
+              // use zero as a default value.
+              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+              {
+                const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
+                const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+                for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+                {
+                  float input_value = input_data[in_offset + in_channel];
+                  float filter_value = filter_data[filter_offset + in_channel];
+                  total += (input_value * filter_value);
+                }
+              }
+            }
+          }
+          float bias_value = 0.0f;
+          if (bias_data)
+          {
+            bias_value = bias_data[out_channel];
+          }
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+              ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
+                                           output_activation_max);
+        }
+      }
+    }
+  }
+}
+
+inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+                 const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
+                 const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  assert(output_activation_min <= output_activation_max);
+
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  UNUSED_RELEASE(bias_shape);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    assert(bias_shape.FlatSize() == output_depth);
+  }
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - pad_width;
+          const int in_y_origin = (out_y * stride_height) - pad_height;
+          int32_t acc = 0;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              const int in_y = in_y_origin + dilation_height_factor * filter_y;
+              // If the location is outside the bounds of the input image,
+              // use zero as a default value.
+              const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
+              const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+              {
+                for (int in_channel = 0; in_channel < input_depth; in_channel++)
+                {
+                  int32_t input_val = input_data[in_channel + in_base];
+                  int32_t filter_val = filter_data[in_channel + filter_base];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_channel];
+          }
+          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+          acc += output_offset;
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+              static_cast<uint8_t>(acc);
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_CONCATENATION_H_
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h
new file mode 100644
index 000000000..7d022477d
--- /dev/null
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_DEPTHWISE_CONV_H__
+#define __NNFW_CKER_DEPTHWISE_CONV_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct DepthwiseConvParams
+{
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  int16_t stride_width;
+  int16_t stride_height;
+  int16_t dilation_width_factor;
+  int16_t dilation_height_factor;
+  int16_t depth_multiplier;
+  // uint8 inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+};
+
+inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
+                          const uint8_t *input_data, const Shape &filter_shape,
+                          const uint8_t *filter_data, const Shape &bias_shape,
+                          const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  assert(output_activation_min <= output_activation_max);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(bias_shape);
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int ic = 0; ic < input_depth; ++ic)
+        {
+          for (int m = 0; m < depth_multiplier; m++)
+          {
+            const int oc = m + ic * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // If the location is outside the bounds of the input image,
+                // use zero as a default value.
+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                {
+                  int32_t input_val = input_data[Offset(input_shape, b, in_y, in_x, ic)];
+                  int32_t filter_val = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[oc];
+            }
+            acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            acc += output_offset;
+            acc = std::max(acc, output_activation_min);
+            acc = std::min(acc, output_activation_max);
+            output_data[Offset(output_shape, b, out_y, out_x, oc)] = static_cast<uint8_t>(acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+inline void DepthwiseConv(const DepthwiseConvParams &params, const Shape &input_shape,
+                          const float *input_data, const Shape &filter_shape,
+                          const float *filter_data, const Shape &bias_shape, const float *bias_data,
+                          const Shape &output_shape, float *output_data)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(bias_shape);
+
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int ic = 0; ic < input_depth; ++ic)
+        {
+          for (int m = 0; m < depth_multiplier; m++)
+          {
+            const int oc = m + ic * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            float total = 0.f;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // If the location is outside the bounds of the input image,
+                // use zero as a default value.
+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                {
+                  float input_value = input_data[Offset(input_shape, b, in_y, in_x, ic)];
+                  float filter_value = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)];
+                  total += (input_value * filter_value);
+                }
+              }
+            }
+            float bias_value = 0.0f;
+            if (bias_data)
+            {
+              bias_value = bias_data[oc];
+            }
+            output_data[Offset(output_shape, b, out_y, out_x, oc)] = ActivationFunctionWithMinMax(
+                total + bias_value, output_activation_min, output_activation_max);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_DEPTHWISE_CONV_H__
diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h
new file mode 100644
index 000000000..428fb1b53
--- /dev/null
+++ b/compute/cker/include/cker/operation/FullyConnected.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_FULLY_CONNECTED_H__
+#define __NNFW_CKER_FULLY_CONNECTED_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct FullyConnectedParams
+{
+  // uint8 inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+  // FullyConnectedWeightsFormat weights_format;
+};
+
+inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
+                           const float *input_data, const Shape &weights_shape,
+                           const float *weights_data, const Shape &bias_shape,
+                           const float *bias_data, const Shape &output_shape, float *output_data)
+{
+  UNUSED_RELEASE(input_shape);
+  UNUSED_RELEASE(bias_shape);
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth =
+      MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_c = 0; out_c < output_depth; ++out_c)
+    {
+      float total = 0.f;
+      for (int d = 0; d < accum_depth; ++d)
+      {
+        total += input_data[b * accum_depth + d] * weights_data[out_c * accum_depth + d];
+      }
+      float bias_value = 0.0f;
+      if (bias_data)
+      {
+        bias_value = bias_data[out_c];
+      }
+      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
+          total + bias_value, output_activation_min, output_activation_max);
+    }
+  }
+}
+
+inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
+                           const uint8_t *input_data, const Shape &filter_shape,
+                           const uint8_t *filter_data, const Shape &bias_shape,
+                           const int32_t *bias_data, const Shape &output_shape,
+                           uint8_t *output_data)
+{
+  UNUSED_RELEASE(input_shape);
+  UNUSED_RELEASE(bias_shape);
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  assert(filter_shape.DimensionsCount() >= 2);
+  assert(output_shape.DimensionsCount() >= 1);
+
+  assert(output_activation_min <= output_activation_max);
+  // TODO(benoitjacob): This really should be:
+  //     const int batches = ArraySize(output_dims, 1);
+  // but the current --variable_batch hack consists in overwriting the 3rd
+  // dimension with the runtime batch size, as we don't keep track for each
+  // array of which dimension is the batch dimension in it.
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth =
+      MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_c = 0; out_c < output_depth; ++out_c)
+    {
+      int32_t acc = 0;
+      for (int d = 0; d < accum_depth; ++d)
+      {
+        int32_t input_val = input_data[b * accum_depth + d];
+        int32_t filter_val = filter_data[out_c * accum_depth + d];
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
+      }
+      if (bias_data)
+      {
+        acc += bias_data[out_c];
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc += output_offset;
+      acc = std::max(acc, output_activation_min);
+      acc = std::min(acc, output_activation_max);
+      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_FULLY_CONNECTED_H__
diff --git a/compute/cker/include/cker/operation/Gather.h b/compute/cker/include/cker/operation/Gather.h
new file mode 100644
index 000000000..9cd96eeb7
--- /dev/null
+++ b/compute/cker/include/cker/operation/Gather.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_GATHER_H__
+#define __NNFW_CKER_GATHER_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct GatherParams
+{
+  int32_t axis;
+};
+
+template <typename T, typename CoordsT = int32_t>
+inline void Gather(const GatherParams &op_params, const Shape &input_shape, const T *input_data,
+                   const Shape &coords_shape, const CoordsT *coords_data, const Shape &,
+                   T *output_data)
+{
+  int axis = op_params.axis;
+  if (axis < 0)
+  {
+    axis += input_shape.DimensionsCount();
+  }
+  assert(axis >= 0);
+  assert(axis < input_shape.DimensionsCount());
+  const int axis_size = input_shape.Dims(axis);
+  const int coords_count = coords_shape.FlatSize();
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int inner_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    inner_size *= input_shape.Dims(i);
+  }
+
+  for (int outer = 0; outer < outer_size; ++outer)
+  {
+    for (int i = 0; i < coords_count; ++i)
+    {
+      assert(coords_data[i] >= 0);
+      assert(coords_data[i] < axis_size);
+      std::memcpy(output_data + (outer * coords_count + i) * inner_size,
+                  input_data + (outer * axis_size + coords_data[i]) * inner_size,
+                  sizeof(T) * inner_size);
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_GATHER_H__
diff --git a/compute/cker/include/cker/operation/InstanceNorm.h b/compute/cker/include/cker/operation/InstanceNorm.h
new file mode 100644
index 000000000..794dcebc8
--- /dev/null
+++ b/compute/cker/include/cker/operation/InstanceNorm.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_INSTANCE_NORM_H__
+#define __NNFW_CKER_INSTANCE_NORM_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct InstanceNormParams
+{
+  float epsilon;
+  float float_activation_min;
+  float float_activation_max;
+};
+
+inline void InstanceNorm(const InstanceNormParams &params, const Shape &input_shape,
+                         const float *input_data, const Shape &gamma_shape, const float *gamma_data,
+                         const Shape &beta_shape, const float *beta_data, const Shape &output_shape,
+                         float *output_data)
+{
+  const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = MatchingDim(input_shape, 3, output_shape, 3);
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
+  UNUSED_RELEASE(gamma_shape);
+  UNUSED_RELEASE(beta_shape);
+  assert(output_activation_min <= output_activation_max);
+
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = gamma_data[channel];
+      double beta = beta_data[channel];
+
+      double a = gamma / (std::sqrt(var + params.epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value = input_data[Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[Offset(output_shape, batch, height, width, channel)] =
+              ActivationFunctionWithMinMax((float)output_value, output_activation_min,
+                                           output_activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_INSTANCE_NORM_H__
diff --git a/compute/cker/include/cker/operation/Logistic.h b/compute/cker/include/cker/operation/Logistic.h
new file mode 100644
index 000000000..872095531
--- /dev/null
+++ b/compute/cker/include/cker/operation/Logistic.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_LOGISTIC_H__
+#define __NNFW_CKER_LOGISTIC_H__
+
+#include "cker/Shape.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     float *output_data)
+{
+  // Note, this can be done using TANH: (1/2) + (1/2) * TANH(x/2)
+  const int size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < size; i++)
+  {
+    output_data[i] = 1.f / (1.f + std::exp(-input_data[i]));
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_LOGISTIC_H__
diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h
new file mode 100644
index 000000000..326168b99
--- /dev/null
+++ b/compute/cker/include/cker/operation/MaxPool.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_MAX_POOL_H__
+#define __NNFW_CKER_MAX_POOL_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+#include "cker/operation/optimized/MaxPool.h"
+#include "cker/operation/reference/MaxPool.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+#if defined(CKER_OPTIMIZED_EIGEN)
+  optimized::MaxPool(params, input_shape, input_data, output_shape, output_data);
+#else  // defined(CKER_OPTIMIZED_EIGEN)
+  reference::MaxPool(params, input_shape, input_data, output_shape, output_data);
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+}
+
+inline void MaxPool(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
+                    const Shape &output_shape, uint8_t *output_data)
+{
+  assert(params.quantized_activation_min <= params.quantized_activation_max);
+  assert(params.quantized_activation_min >= 0);
+  assert(params.quantized_activation_max <= 255);
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          uint8_t max = 0;
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+              max = std::max(max, input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
+            }
+          }
+          max = std::max<uint8_t>(max, params.quantized_activation_min);
+          max = std::min<uint8_t>(max, params.quantized_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
+              static_cast<uint8_t>(max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_MAX_POOL_H__
diff --git a/compute/cker/include/cker/operation/Pad.h b/compute/cker/include/cker/operation/Pad.h
new file mode 100644
index 000000000..af432f3a8
--- /dev/null
+++ b/compute/cker/include/cker/operation/Pad.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_PAD_H__
+#define __NNFW_CKER_PAD_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <stdexcept>
+#include <iostream>
+namespace nnfw
+{
+namespace cker
+{
+inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape,
+                const float *input_data, const Shape &output_shape, float *output_data,
+                const float *constant_value_data)
+{
+  // Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
+  // TODO: come up with more subtle solution that uses subtensors like arm compute
+  // TODO: Check if it works for all layouts
+
+  using PaddingInfo = std::pair<int32_t, int32_t>;
+  /** List of padding information */
+  using PaddingList = std::vector<PaddingInfo>;
+
+  auto constant_value = constant_value_data ? *constant_value_data : 0;
+  assert(output_shape.DimensionsCount() == input_shape.DimensionsCount());
+
+  PaddingList padding_list(pad_rank);
+  for (int32_t n = 0; n < pad_rank; ++n)
+  {
+    const int32_t *from = padding_data + (n * 2);
+    padding_list[n] = {from[0], from[1]};
+  }
+  for (int32_t i = 0; i < pad_rank; ++i)
+  {
+    assert(output_shape.Dims(i) ==
+           input_shape.Dims(i) + padding_list[i].first + padding_list[i].second);
+  }
+  /* Use pad_rank since given input/output shapes are expanded to 4d before calling all cker
+     functions:
+     1. to prevent access violation in padding_list;
+     2. handling as 4d is slower than as 2d/3d.
+  */
+  switch (pad_rank)
+  {
+    case 0:
+    case 1:
+    {
+      const int32_t in_row_len = input_shape.Dims(0);
+      std::fill_n(output_data, padding_list[0].first, constant_value);
+      std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(float));
+      std::fill_n(output_data + padding_list[0].first + in_row_len, padding_list[0].second,
+                  constant_value);
+      break;
+    }
+    case 2: // HW
+    {
+      const int32_t in_row_len = input_shape.Dims(1);
+      const int32_t out_row_size = output_shape.Dims(1);
+
+      // prepend padding rows
+      std::fill_n(output_data, padding_list[0].first * out_row_size, constant_value);
+
+      const auto r_h_inp_lim = input_shape.Dims(0) + padding_list[0].first;
+      for (auto i = padding_list[0].first, j = 0; i < r_h_inp_lim; ++i, ++j)
+      {
+        auto out_offset = i * out_row_size;
+        const auto in_offset = j * in_row_len;
+
+        // prepend padding values
+        std::fill_n(output_data + out_offset, padding_list[1].first, constant_value);
+
+        out_offset += padding_list[1].first;
+
+        // copy a row of input data
+        memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+
+        out_offset += in_row_len;
+
+        // append padding values
+        std::fill_n(output_data + out_offset, padding_list[1].second, constant_value);
+      }
+
+      // append padding rows
+      std::fill_n(output_data + r_h_inp_lim * out_row_size, padding_list[0].second * out_row_size,
+                  constant_value);
+      break;
+    }
+    case 3: // HWC
+    {
+      const int32_t in_row_len = input_shape.Dims(2);
+      const int32_t out_row_size = output_shape.Dims(2);
+      const auto plain_size = out_row_size * output_shape.Dims(1);
+
+      // prepend padding plains
+      std::fill_n(output_data, padding_list[0].first * plain_size, constant_value);
+
+      const auto r_h_inp_lim = input_shape.Dims(0) + padding_list[0].first;
+      for (auto i = padding_list[0].first, i_inp = 0; i < r_h_inp_lim; ++i, ++i_inp)
+      {
+        const auto out_w_offset = (i * output_shape.Dims(1) + 0) * output_shape.Dims(2);
+
+        // prepend padding rows
+        std::fill_n(output_data + out_w_offset, padding_list[1].first * out_row_size,
+                    constant_value);
+
+        const auto r_w_inp_lim = input_shape.Dims(1) + padding_list[1].first;
+        for (auto j = padding_list[1].first, j_inp = 0; j < r_w_inp_lim; ++j, ++j_inp)
+        {
+          auto out_offset = (i * output_shape.Dims(1) + j) * output_shape.Dims(2);
+          const auto in_offset = (i_inp * input_shape.Dims(1) + j_inp) * input_shape.Dims(2);
+
+          // prepend padding values
+          std::fill_n(output_data + out_offset, padding_list[2].first, constant_value);
+
+          out_offset += padding_list[2].first;
+
+          // copy a row of input data
+          memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+
+          out_offset += in_row_len;
+
+          // append padding values
+          std::fill_n(output_data + out_offset, padding_list[2].second, constant_value);
+        }
+
+        // append padding rows
+        std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
+                    padding_list[1].second * out_row_size, constant_value);
+      }
+
+      // append padding plains
+      std::fill_n(output_data + r_h_inp_lim * plain_size, padding_list[0].second * plain_size,
+                  constant_value);
+      break;
+    }
+    case 4:
+    {
+      auto get_offset = [](const Shape &shape, int32_t n, int32_t h, int32_t w) -> int32_t {
+        return ((n * shape.Dims(1) + h) * shape.Dims(2) + w) * shape.Dims(3);
+      };
+      const int32_t in_row_len = input_shape.Dims(3);
+      const int32_t out_row_size = output_shape.Dims(3);
+      const auto plain_size = out_row_size * output_shape.Dims(2);
+      const auto parallelepiped_size = plain_size * output_shape.Dims(1);
+
+      // prepend padding parallelepipeds
+      std::fill_n(output_data, padding_list[0].first * parallelepiped_size, constant_value);
+
+      const auto r_b_inp_lim = input_shape.Dims(0) + padding_list[0].first;
+      for (auto i = padding_list[0].first, i_inp = 0; i < r_b_inp_lim; ++i, ++i_inp)
+      {
+        const auto out_h_offset = get_offset(output_shape, i, 0, 0);
+        // prepend padding plains
+        std::fill_n(output_data + out_h_offset, padding_list[1].first * plain_size, constant_value);
+
+        const auto r_h_inp_lim = input_shape.Dims(1) + padding_list[1].first;
+        for (auto j = padding_list[1].first, j_inp = 0; j < r_h_inp_lim; ++j, ++j_inp)
+        {
+          const auto out_w_offset = get_offset(output_shape, i, j, 0);
+
+          // prepend padding rows
+          std::fill_n(output_data + out_w_offset, padding_list[2].first * out_row_size,
+                      constant_value);
+
+          const auto r_w_inp_lim = input_shape.Dims(2) + padding_list[2].first;
+          for (auto k = padding_list[2].first, k_inp = 0; k < r_w_inp_lim; ++k, ++k_inp)
+          {
+            auto out_c_offset = get_offset(output_shape, i, j, k);
+            const auto in_offset = get_offset(input_shape, i_inp, j_inp, k_inp);
+
+            // prepend padding values
+            std::fill_n(output_data + out_c_offset, padding_list[3].first, constant_value);
+
+            out_c_offset += padding_list[3].first;
+
+            // copy a row of input data
+            memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(float));
+
+            out_c_offset += in_row_len;
+
+            // append padding values
+            std::fill_n(output_data + out_c_offset, padding_list[3].second, constant_value);
+          }
+
+          // append padding rows
+          std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
+                      padding_list[2].second * out_row_size, constant_value);
+        }
+
+        // append padding plains
+        std::fill_n(output_data + out_h_offset + r_h_inp_lim * plain_size,
+                    padding_list[1].second * plain_size, constant_value);
+      }
+      // append padding parallelepipeds
+      std::fill_n(output_data + r_b_inp_lim * parallelepiped_size,
+                  padding_list[0].second * parallelepiped_size, constant_value);
+      break;
+    }
+    default:
+      throw std::runtime_error("Padding for rank > 4 NYI");
+      break;
+  }
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_PAD_H__
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h
new file mode 100644
index 000000000..ea404a002
--- /dev/null
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SOFTMAX_H__
+#define __NNFW_CKER_SOFTMAX_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+#include "cker/Types.h"
+#include "cker/gemmlowp/FixedPoint.h"
+#include "cker/operation/optimized/SoftMax.h"
+#include "cker/operation/reference/SoftMax.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+#if defined(CKER_OPTIMIZED_EIGEN)
+  optimized::Softmax(params, input_shape, input_data, output_shape, output_data);
+#else  // defined(CKER_OPTIMIZED_EIGEN)
+  reference::Softmax(params, input_shape, input_data, output_shape, output_data);
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+}
+
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
+                    const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+  const int32_t input_beta_multiplier = params.input_multiplier;
+  const int32_t input_beta_left_shift = params.input_left_shift;
+  const int diff_min = params.diff_min;
+  // The representation chosen for the input to the exp() function is Q5.26.
+  // We need to leave extra space since values that we skip might be as large as
+  // -32 before multiplying by input_beta_multiplier, and therefore as large as
+  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
+  // accumulation, but exp(-16) definitely is.
+  static const int kScaledDiffIntegerBits = 5;
+  static const int kAccumulationIntegerBits = 12;
+  using FixedPointScaledDiff = gemmlowp::FixedPoint<kScaledDiffIntegerBits>;
+  using FixedPointAccum = gemmlowp::FixedPoint<kAccumulationIntegerBits>;
+  using FixedPoint0 = gemmlowp::FixedPoint<0>;
+
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    uint8_t max_in_row = 0;
+    for (int c = 0; c < depth; ++c)
+    {
+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+    }
+
+    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
+    for (int c = 0; c < depth; ++c)
+    {
+      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
+      if (input_diff >= diff_min)
+      {
+        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
+            input_diff, input_beta_multiplier, input_beta_left_shift);
+        const FixedPointScaledDiff scaled_diff_f8 =
+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
+                                        exp_on_negative_values(scaled_diff_f8));
+      }
+    }
+
+    int32_t fixed_sum_of_exps = sum_of_exps.raw();
+    int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
+    // This is the number of bits to the left of the binary point above 1.0.
+    // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
+    // no later adjustment will be needed.
+    int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
+    int32_t shifted_sum_minus_one =
+        static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
+                             (static_cast<uint32_t>(1) << 31));
+
+    FixedPoint0 shifted_scale =
+        one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
+
+    for (int c = 0; c < depth; ++c)
+    {
+      int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
+      if (input_diff >= diff_min)
+      {
+        const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
+            input_diff, input_beta_multiplier, input_beta_left_shift);
+        const FixedPointScaledDiff scaled_diff_f8 =
+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+
+        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
+        int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
+                                                             num_bits_over_unit + 31 - 8);
+
+        output_data[i * depth + c] = static_cast<uint8_t>(
+            std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
+      }
+      else
+      {
+        output_data[i * depth + c] = 0;
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SOFTMAX_H__
diff --git a/compute/cker/include/cker/operation/TransposeConv.h b/compute/cker/include/cker/operation/TransposeConv.h
new file mode 100644
index 000000000..535fe86cf
--- /dev/null
+++ b/compute/cker/include/cker/operation/TransposeConv.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_TRANSPOSE_CONV_H__
+#define __NNFW_CKER_TRANSPOSE_CONV_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct TransposeConvParams
+{
+  PaddingType padding_type;
+  PaddingValues padding_values;
+  // TODO(starka): This was just "stride", so check that width+height is OK.
+  int16_t stride_width;
+  int16_t stride_height;
+  int16_t dilation_width_factor;
+  int16_t dilation_height_factor;
+  // uint8_t inference params.
+  // TODO(b/65838351): Use smaller types if appropriate.
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int output_shift;
+  // uint8_t, etc, activation params.
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
+};
+
+inline void TransposeConv(const TransposeConvParams &params, const Shape &input_shape,
+                          const float *input_data, const Shape &filter_shape,
+                          const float *filter_data, const Shape &output_shape, float *output_data)
+{
+
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+
+  // Although transpose convolution simplifies to convolution with transposed
+  // weights for strides of 1, non-unitary striding complicates matters. To
+  // keep this reference implementation as clear as possible, we use a
+  // "scatter" access pattern, where we loop through all the input elements,
+  // computing their influence on the output, rather than looping through the
+  // output elements in the typical "gather" access pattern of a conv. We
+  // therefore must initialize the output array to zero.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; i++)
+  {
+    output_data[i] = 0.0f;
+  }
+
+  // Loop through input elements one at a time.
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          // Loop through the output elements it will influence
+          const int out_x_origin = (in_x * stride_width) - pad_width;
+          const int out_y_origin = (in_y * stride_height) - pad_height;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+              {
+                // Compute output element location
+                const int out_x = out_x_origin + filter_x;
+                const int out_y = out_y_origin + filter_y;
+                // We cannot accumulate out of bounds
+                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
+                    (out_y < output_height))
+                {
+                  float input_value =
+                      input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                  float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y,
+                                                          filter_x, in_channel)];
+                  output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] +=
+                      input_value * filter_value;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_TRANSPOSE_CONV_H__
diff --git a/compute/cker/include/cker/operation/optimized/AveragePool.h b/compute/cker/include/cker/operation/optimized/AveragePool.h
new file mode 100644
index 000000000..d94a5811a
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/AveragePool.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
+#define __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
+
+#if defined(CKER_OPTIMIZED_EIGEN)
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// TODO Change to apply neon for this function if it is faster
+inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                        const Shape &output_shape, float *output_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+
+  // TODO(benoitjacob) make this a proper reference impl without Eigen!
+  const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+  auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+  // TODO(benoitjacob) get rid of the dynamic memory allocation here!
+  Eigen::VectorXf out_count(out_mat.cols());
+  out_count.setZero();
+  // Prefill the output to 0.
+  out_mat.setZero();
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int h = 0; h < input_height; ++h)
+    {
+      for (int w = 0; w < input_width; ++w)
+      {
+        // (h_start, h_end) * (w_start, w_end) is the range that the input
+        // vector projects to.
+        int hpad = h + params.padding_values.height;
+        int wpad = w + params.padding_values.width;
+        int h_start =
+            (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+        int h_end = std::min(hpad / stride_height + 1, output_height);
+        int w_start =
+            (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+        int w_end = std::min(wpad / stride_width + 1, output_width);
+        // compute elementwise sum
+        for (int ph = h_start; ph < h_end; ++ph)
+        {
+          for (int pw = w_start; pw < w_end; ++pw)
+          {
+            int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+            out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
+            out_count(out_offset)++;
+          }
+        }
+      }
+    }
+  }
+  // Divide the output by the actual number of elements being averaged over
+  assert(out_count.minCoeff() > 0);
+  out_mat.array().rowwise() /= out_count.transpose().array();
+
+  const int flat_size = output_shape.FlatSize();
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
+                                                  params.float_activation_max);
+  }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#endif // __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
diff --git a/compute/cker/include/cker/operation/optimized/MaxPool.h b/compute/cker/include/cker/operation/optimized/MaxPool.h
new file mode 100644
index 000000000..07a14aee4
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/MaxPool.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
+#define __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
+
+#if defined(CKER_OPTIMIZED_EIGEN)
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// TODO Change to apply neon for this function if it is faster
+inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+
+  const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+  auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+  // Prefill the output to minimum representable float value
+  out_mat.setConstant(std::numeric_limits<float>::lowest());
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int h = 0; h < input_height; ++h)
+    {
+      for (int w = 0; w < input_width; ++w)
+      {
+        // (h_start, h_end) * (w_start, w_end) is the range that the input
+        // vector projects to.
+        int hpad = h + params.padding_values.height;
+        int wpad = w + params.padding_values.width;
+        int h_start =
+            (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+        int h_end = std::min(hpad / stride_height + 1, output_height);
+        int w_start =
+            (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+        int w_end = std::min(wpad / stride_width + 1, output_width);
+        // compute elementwise sum
+        for (int ph = h_start; ph < h_end; ++ph)
+        {
+          for (int pw = w_start; pw < w_end; ++pw)
+          {
+            int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+            out_mat.col(out_offset) =
+                out_mat.col(out_offset)
+                    .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
+          }
+        }
+      }
+    }
+  }
+  const int flat_size = output_shape.FlatSize();
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
+                                                  params.float_activation_max);
+  }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#endif // __NNFW_CKER_OPTIMIZED_MAX_POOL_H__
diff --git a/compute/cker/include/cker/operation/optimized/SoftMax.h b/compute/cker/include/cker/operation/optimized/SoftMax.h
new file mode 100644
index 000000000..e44f251d0
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/SoftMax.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
+#define __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
+
+#if defined(CKER_OPTIMIZED_EIGEN)
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  // Validate whether if shapes of input and output are the same
+  MatchingFlatSize(input_shape, output_shape);
+
+  const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+  auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+  // Compute the exponential first, removing the max coefficient for numerical
+  // stability.
+  out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
+  // We are separating out the exp function so that exp can be vectorized.
+  out_mat = out_mat.array().exp();
+  // Normalize to get the activations.
+  Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
+  out_mat.array().rowwise() *= scale;
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#endif // __NNFW_CKER_OPTIMIZED_SOFTMAX_H__
diff --git a/compute/cker/include/cker/operation/reference/AveragePool.h b/compute/cker/include/cker/operation/reference/AveragePool.h
new file mode 100644
index 000000000..3ddab4b24
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/AveragePool.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
+#define __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                        const Shape &output_shape, float *output_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+        const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+        // Compute the boundaries of the filter region clamped so as to
+        // ensure that the filter window fits in the input array.
+        const int filter_x_start = std::max(0, -in_x_origin);
+        const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+        const int filter_y_start = std::max(0, -in_y_origin);
+        const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+        int filter_count = (filter_y_end - filter_y_start) * (filter_x_end - filter_x_start);
+        if (filter_count <= 0)
+        {
+          continue;
+        }
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          float total = 0.f;
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+              total += input_data[Offset(input_shape, batch, in_y, in_x, channel)];
+            }
+          }
+          const float average = total / (float)filter_count;
+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
+              ActivationFunctionWithMinMax(average, params.float_activation_min,
+                                           params.float_activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace reference
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_AVERAGE_POOL_H__
diff --git a/compute/cker/include/cker/operation/reference/MaxPool.h b/compute/cker/include/cker/operation/reference/MaxPool.h
new file mode 100644
index 000000000..a0f0263c7
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/MaxPool.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_MAX_POOL_H__
+#define __NNFW_CKER_REFERENCE_MAX_POOL_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int channel = 0; channel < depth; ++channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
+          const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
+          // Compute the boundaries of the filter region clamped so as to
+          // ensure that the filter window fits in the input array.
+          const int filter_x_start = std::max(0, -in_x_origin);
+          const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
+          const int filter_y_start = std::max(0, -in_y_origin);
+          const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
+          float max = std::numeric_limits<float>::lowest();
+          for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+          {
+            for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+            {
+              const int in_x = in_x_origin + filter_x;
+              const int in_y = in_y_origin + filter_y;
+              max = std::max(max, input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
+            }
+          }
+          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
+              ActivationFunctionWithMinMax(max, params.float_activation_min,
+                                           params.float_activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace reference
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_MAX_POOL_H__
diff --git a/compute/cker/include/cker/operation/reference/SoftMax.h b/compute/cker/include/cker/operation/reference/SoftMax.h
new file mode 100644
index 000000000..420cb319b
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/SoftMax.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_SOFTMAX_H__
+#define __NNFW_CKER_REFERENCE_SOFTMAX_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference
+{
+
+inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    // Find max element value which we'll use to ensure numerical stability
+    // taking advantage of the following equality:
+    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
+    float max = std::numeric_limits<float>::lowest();
+    for (int c = 0; c < depth; ++c)
+    {
+      max = std::max(max, input_data[i * depth + c]);
+    }
+
+    // Compute sum.
+    float sum = 0.f;
+    for (int c = 0; c < depth; ++c)
+    {
+      sum += std::exp((input_data[i * depth + c] - max) * params.beta);
+    }
+
+    // Compute result.
+    for (int c = 0; c < depth; ++c)
+    {
+      output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
+    }
+  }
+}
+
+} // namespace reference
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_SOFTMAX_H__
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-03-04 18:09:24 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-03-04 18:09:24 +0900
commit	302e6564a7a76109e1178207e44e45a58631c477 (patch)
tree	6cc4bd95e5e438331fc2c53234af4ed0e0f3bc20 /compute/cker/include/cker/operation
parent	bd11b24234d7d43dfe05a81c520aa01ffad06e42 (diff)
download	nnfw-302e6564a7a76109e1178207e44e45a58631c477.tar.gz nnfw-302e6564a7a76109e1178207e44e45a58631c477.tar.bz2 nnfw-302e6564a7a76109e1178207e44e45a58631c477.zip