summaryrefslogtreecommitdiff
path: root/compiler/locomotiv/src/Node
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/locomotiv/src/Node')
-rw-r--r--compiler/locomotiv/src/Node/AvgPool2D.cpp179
-rw-r--r--compiler/locomotiv/src/Node/AvgPool2D.test.cpp176
-rw-r--r--compiler/locomotiv/src/Node/BiasAdd.cpp121
-rw-r--r--compiler/locomotiv/src/Node/BiasAdd.test.cpp204
-rw-r--r--compiler/locomotiv/src/Node/BiasEncode.cpp63
-rw-r--r--compiler/locomotiv/src/Node/BiasEncode.test.cpp95
-rw-r--r--compiler/locomotiv/src/Node/ConstGen.cpp116
-rw-r--r--compiler/locomotiv/src/Node/ConstGen.test.cpp100
-rw-r--r--compiler/locomotiv/src/Node/Conv2D.cpp179
-rw-r--r--compiler/locomotiv/src/Node/Conv2D.test.cpp231
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseConv2D.cpp185
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp164
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp113
-rw-r--r--compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp90
-rw-r--r--compiler/locomotiv/src/Node/EltwiseAdd.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseAdd.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseDiv.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseDiv.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMax.cpp36
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMax.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMul.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseMul.test.cpp124
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSqrt.cpp43
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp69
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSub.cpp34
-rw-r--r--compiler/locomotiv/src/Node/EltwiseSub.test.cpp121
-rw-r--r--compiler/locomotiv/src/Node/FeatureCodec.test.cpp223
-rw-r--r--compiler/locomotiv/src/Node/FeatureDecode.cpp112
-rw-r--r--compiler/locomotiv/src/Node/FeatureEncode.cpp114
-rw-r--r--compiler/locomotiv/src/Node/FilterEncode.cpp114
-rw-r--r--compiler/locomotiv/src/Node/FilterEncode.test.cpp144
-rw-r--r--compiler/locomotiv/src/Node/Forward.cpp62
-rw-r--r--compiler/locomotiv/src/Node/Forward.test.cpp88
-rw-r--r--compiler/locomotiv/src/Node/MatMul.cpp133
-rw-r--r--compiler/locomotiv/src/Node/MatMul.test.cpp188
-rw-r--r--compiler/locomotiv/src/Node/MatrixCodec.test.cpp207
-rw-r--r--compiler/locomotiv/src/Node/MatrixDecode.cpp109
-rw-r--r--compiler/locomotiv/src/Node/MatrixEncode.cpp112
-rw-r--r--compiler/locomotiv/src/Node/MaxPool2D.cpp167
-rw-r--r--compiler/locomotiv/src/Node/MaxPool2D.test.cpp159
-rw-r--r--compiler/locomotiv/src/Node/Pull.cpp72
-rw-r--r--compiler/locomotiv/src/Node/Pull.test.cpp61
-rw-r--r--compiler/locomotiv/src/Node/Push.cpp61
-rw-r--r--compiler/locomotiv/src/Node/Push.test.cpp88
-rw-r--r--compiler/locomotiv/src/Node/ReLU.cpp41
-rw-r--r--compiler/locomotiv/src/Node/ReLU.test.cpp62
-rw-r--r--compiler/locomotiv/src/Node/ReLU6.cpp96
-rw-r--r--compiler/locomotiv/src/Node/ReLU6.test.cpp66
-rw-r--r--compiler/locomotiv/src/Node/Reshape.cpp90
-rw-r--r--compiler/locomotiv/src/Node/Reshape.test.cpp67
-rw-r--r--compiler/locomotiv/src/Node/Softmax.cpp122
-rw-r--r--compiler/locomotiv/src/Node/Softmax.test.cpp68
-rw-r--r--compiler/locomotiv/src/Node/Tanh.cpp41
-rw-r--r--compiler/locomotiv/src/Node/Tanh.test.cpp64
-rw-r--r--compiler/locomotiv/src/Node/TensorBroadcast.cpp106
-rw-r--r--compiler/locomotiv/src/Node/TensorBroadcast.test.cpp63
-rw-r--r--compiler/locomotiv/src/Node/TensorConcat.cpp113
-rw-r--r--compiler/locomotiv/src/Node/TensorConcat.test.cpp128
-rw-r--r--compiler/locomotiv/src/Node/TensorConstantPad.cpp113
-rw-r--r--compiler/locomotiv/src/Node/TensorConstantPad.test.cpp218
-rw-r--r--compiler/locomotiv/src/Node/TensorReduce.cpp153
-rw-r--r--compiler/locomotiv/src/Node/TensorReduce.test.cpp104
-rw-r--r--compiler/locomotiv/src/Node/TransposedConv2D.cpp189
-rw-r--r--compiler/locomotiv/src/Node/TransposedConv2D.test.cpp144
64 files changed, 7170 insertions, 0 deletions
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.cpp b/compiler/locomotiv/src/Node/AvgPool2D.cpp
new file mode 100644
index 000000000..ad603badf
--- /dev/null
+++ b/compiler/locomotiv/src/Node/AvgPool2D.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+template <typename T>
+nncc::core::ADT::tensor::Buffer<T> avgPool2D(const loco::AvgPool2D *avgpool2d,
+ const Buffer<T> *ifm_buf)
+{
+ assert(avgpool2d->convention() == loco::AvgPool2D::Convention::Valid ||
+ avgpool2d->convention() == loco::AvgPool2D::Convention::Full);
+
+ auto ifm_shape = ifm_buf->shape();
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t depth = ifm_shape.dim(3);
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t window_height = avgpool2d->window()->vertical();
+ const uint32_t window_width = avgpool2d->window()->horizontal();
+
+ const uint32_t stride_height = avgpool2d->stride()->vertical();
+ const uint32_t stride_width = avgpool2d->stride()->horizontal();
+
+ const uint32_t pad_top = avgpool2d->pad()->top();
+ const uint32_t pad_bottom = avgpool2d->pad()->bottom();
+
+ const uint32_t pad_left = avgpool2d->pad()->left();
+ const uint32_t pad_right = avgpool2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+
+ // prepare output buffer
+ Shape output_shape{batches, output_height, output_width, depth};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t channel = 0; channel < depth; ++channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ uint32_t f_x0, f_x1, f_y0, f_y1;
+ if (avgpool2d->convention() == loco::AvgPool2D::Convention::Valid)
+ {
+ f_x0 = std::max(0, -in_x_origin);
+ f_x1 = std::min(window_width, ifm_width - in_x_origin);
+ f_y0 = std::max(0, -in_y_origin);
+ f_y1 = std::min(window_height, ifm_height - in_y_origin);
+ }
+ else
+ {
+ throw std::runtime_error("TODO support AvgPool2D::Convention::Full");
+ }
+ const uint32_t filter_x_start = f_x0;
+ const uint32_t filter_x_end = f_x1;
+
+ const uint32_t filter_y_start = f_y0;
+ const uint32_t filter_y_end = f_y1;
+
+ T total = 0;
+ uint32_t filter_ele_count = 0;
+
+ for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ {
+ const uint32_t in_x = in_x_origin + filter_x;
+ const uint32_t in_y = in_y_origin + filter_y;
+ total += ifm_buf->at(Index({batch, in_y, in_x, channel}));
+ filter_ele_count++;
+ }
+ }
+
+ assert(filter_ele_count > 0);
+ output_buf.at(Index({batch, out_y, out_x, channel})) = total / filter_ele_count;
+ }
+ }
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::AvgPool2D *avgpool2d)
+{
+ auto ifm_data = annot_data(avgpool2d->ifm());
+
+ validate(ifm_data, "Can't find input data of AvgPool2D");
+ validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4");
+ validate(annot_domain(avgpool2d->ifm()) == loco::Domain::Feature,
+ "ifm of AvgPool2D is not Feature");
+
+ std::unique_ptr<NodeData> avgpool2d_data = nullptr;
+
+ switch (ifm_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+
+ auto avgpool2d_buf = avgPool2D<float>(avgpool2d, ifm_buf);
+
+ avgpool2d_data = make_data(avgpool2d_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(avgpool2d_data != nullptr);
+
+ annot_data(avgpool2d, std::move(avgpool2d_data));
+ annot_domain(avgpool2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
new file mode 100644
index 000000000..89e10a35e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h,
+ const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top,
+ const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right)
+{
+ // Let's make FeatureEncode-AvgPool2D graph
+ auto g = loco::make_graph();
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+
+ // Fill output data of FeatureEncode from ifm
+ auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(enc_buf);
+ locomotiv::annot_data(enc, std::move(enc_data));
+ locomotiv::annot_domain(enc, loco::Domain::Feature);
+
+ // build TF AvgPool2D
+ auto avgpool2d = g->nodes()->create<loco::AvgPool2D>();
+ avgpool2d->ifm(enc);
+ avgpool2d->convention(loco::AvgPool2D::Convention::Valid);
+ avgpool2d->window()->vertical(window_v);
+ avgpool2d->window()->horizontal(window_h);
+ avgpool2d->stride()->vertical(stride_v);
+ avgpool2d->stride()->horizontal(stride_h);
+ avgpool2d->pad()->top(pad_top);
+ avgpool2d->pad()->bottom(pad_bottom);
+ avgpool2d->pad()->left(pad_left);
+ avgpool2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(avgpool2d);
+
+ // get result of calculation
+ auto avgpool2d_data = locomotiv::annot_data(avgpool2d);
+
+ // check the result
+ ASSERT_NE(avgpool2d_data, nullptr);
+ ASSERT_TRUE(avgpool2d_data->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(avgpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(avgpool2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+import tensorflow as tf
+
+value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261], [-1.1777412]],
+ [[1.4411974], [0.01408334], [0.06958964], [-0.08663343]],
+ [[1.3424183], [-0.89015573], [0.2520576], [0.04843695]],
+ [[-1.6668711], [-0.02187406], [1.9362065], [1.3341236]]]])
+avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding= 'VALID',
+ data_format="NHWC")
+with tf.Session() as sess:
+ print(sess.run(avgpool))
+*/
+TEST(NodeExecution_AvgPool2D, f32_1x4x4x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.281157, -1.0601869, -0.622261, -1.1777412,
+ 1.4411974, 0.01408334, 0.06958964, -0.08663343,
+ 1.3424183, -0.89015573, 0.2520576, 0.04843695,
+ -1.6668711, -0.02187406, 1.9362065, 1.3341236
+ };
+
+ const float ofm[] =
+ {
+ 0.02848421, -0.45426148,
+ -0.30912063, 0.89270616
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 4, 4, 1}, Shape{1, 2, 2, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 2, 2, // stride
+ 0, 0, 0, 0 // padding
+ );
+}
+// clang-format on
+
+// clang-format off
+/* ifm and ofm are from the code below:
+import tensorflow as tf
+
+value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261]],
+ [[1.4411974], [0.01408334], [0.06958964]],
+ [[1.3424183], [-0.89015573], [0.2520576]]]])
+avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'SAME',
+ data_format="NHWC")
+with tf.Session() as sess:
+ print(sess.run(avgpool))
+*/
+TEST(NodeExecution_AvgPool2D, f32_1x3x3x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.281157, -1.0601869, -0.622261,
+ 1.4411974, 0.01408334, 0.06958964,
+ 1.3424183, -0.89015573, 0.2520576
+ };
+
+ const float ofm[] =
+ {
+ 0.02848421, -0.39969373, -0.2763357,
+ 0.4768858, -0.13860628, 0.16082363,
+ 0.22613129, -0.31904906, 0.2520576
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 3, 3, 1}, Shape{1, 3, 3, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 1, 1, // stride
+ 0, 1, 0, 1 // padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp
new file mode 100644
index 000000000..0724fb728
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+using locomotiv::NodeData;
+
+std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data,
+ uint32_t axis);
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+ auto input_data = locomotiv::annot_data(bias_add->value());
+ auto bias_data = locomotiv::annot_data(bias_add->bias());
+
+ validate(input_data && bias_data, "Input not ready");
+ validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor &&
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ "Wrong input domain");
+
+ std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis());
+
+ assert(bias_add_data != nullptr);
+ annot_data(bias_add, std::move(bias_add_data));
+ annot_domain(bias_add, annot_domain(bias_add->value()));
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+ auto input_data = locomotiv::annot_data(bias_add->value());
+ auto bias_data = locomotiv::annot_data(bias_add->bias());
+
+ validate(input_data && bias_data, "Input not ready");
+ validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature &&
+ locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias,
+ "Wrong input domain");
+
+ std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3);
+
+ assert(bias_add_data != nullptr);
+ annot_data(bias_add, std::move(bias_add_data));
+ annot_domain(bias_add, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
+
+namespace
+{
+using locomotiv::NodeData;
+using locomotiv::validate;
+using locomotiv::make_data;
+
+std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data, uint32_t axis)
+{
+ validate(input_data->shape()->dim(axis) == bias_data->shape()->dim(0), "Bias size mismatch");
+
+ std::unique_ptr<NodeData> bias_add_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto bias_bufptr = bias_data->as_f32_bufptr();
+ auto bias_add_buf = make_buffer<float, LexicalLayout>(*input_data->shape());
+
+ auto *shape = input_data->shape();
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ nncc::core::ADT::tensor::Index bias_index({index.at(axis)});
+ bias_add_buf.at(index) = input_bufptr->at(index) + bias_bufptr->at(bias_index);
+ }
+
+ bias_add_data = make_data(bias_add_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+ return bias_add_data;
+}
+
+} // namespace
diff --git a/compiler/locomotiv/src/Node/BiasAdd.test.cpp b/compiler/locomotiv/src/Node/BiasAdd.test.cpp
new file mode 100644
index 000000000..0ca826673
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasAdd.test.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+ inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+ bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32)
+ out = tf.nn.bias_add(inp, bias)
+
+ with tf.Session() as sess:
+ print(sess.run(out))
+ */
+
+TEST(NodeExecution_TensorBiasAdd, f32)
+{
+ float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float bias_val[] = {1.1, 2.1};
+ float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1,
+ 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1};
+
+ // make BiasAdd(Pull, Const)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp = g->nodes()->create<loco::Pull>();
+ {
+ inp->dtype(loco::DataType::FLOAT32);
+ inp->shape({1, 3, 3, 2});
+ }
+
+ auto bias = g->nodes()->create<loco::BiasEncode>();
+ {
+ // nothing to do
+ }
+
+ auto bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Tensor>>();
+ {
+ bias_add->value(inp);
+ bias_add->bias(bias);
+ bias_add->axis(3); // axis(3) means C in NHWC
+ }
+
+ // Make and assign data to pull node
+ auto inp_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_buf.at(e.current()) = in_val[n++];
+ }
+ }
+
+ auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ {
+ int n = 0;
+ for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance())
+ {
+ bias_buf.at(e.current()) = bias_val[n++];
+ }
+ }
+
+ auto inp_data = locomotiv::make_data(inp_buf);
+ locomotiv::annot_data(inp, std::move(inp_data));
+ locomotiv::annot_domain(inp, loco::Domain::Tensor);
+
+ auto bias_data = locomotiv::make_data(bias_buf);
+ locomotiv::annot_data(bias, std::move(bias_data));
+ locomotiv::annot_domain(bias, loco::Domain::Bias);
+
+ locomotiv::NodeExecution::get().run(bias_add);
+
+ auto bias_add_data = locomotiv::annot_data(bias_add);
+
+ // comparing the result
+ ASSERT_NE(bias_add_data, nullptr);
+ ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(bias_add), loco::Domain::Tensor);
+}
+
+/*
+test case generated from the following:
+
+ inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+ bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32)
+ out = tf.nn.bias_add(inp, bias)
+
+ with tf.Session() as sess:
+ print(sess.run(out))
+ */
+
+TEST(NodeExecution_FeatureBiasAdd, f32)
+{
+ float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float bias_val[] = {1.1, 2.1};
+ float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1,
+ 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1};
+
+ // make FeatureBiasAdd(FeatureEncode, BiasEncode)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto feature_encode = g->nodes()->create<loco::FeatureEncode>();
+ {
+ // setting values is ignored for testing
+ }
+
+ auto bias = g->nodes()->create<loco::BiasEncode>();
+ {
+ // nothing to do
+ }
+
+ auto feature_bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Feature>>();
+ {
+ feature_bias_add->value(feature_encode);
+ feature_bias_add->bias(bias);
+ }
+
+ // Make and assign data to pull node
+ auto inp_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_buf.at(e.current()) = in_val[n++];
+ }
+ }
+
+ auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ {
+ int n = 0;
+ for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance())
+ {
+ bias_buf.at(e.current()) = bias_val[n++];
+ }
+ }
+
+ auto inp_data = locomotiv::make_data(inp_buf);
+ locomotiv::annot_data(feature_encode, std::move(inp_data));
+ locomotiv::annot_domain(feature_encode, loco::Domain::Feature);
+
+ auto bias_data = locomotiv::make_data(bias_buf);
+ locomotiv::annot_data(bias, std::move(bias_data));
+ locomotiv::annot_domain(bias, loco::Domain::Bias);
+
+ locomotiv::NodeExecution::get().run(feature_bias_add);
+
+ auto bias_add_data = locomotiv::annot_data(feature_bias_add);
+
+ // comparing the result
+ ASSERT_NE(bias_add_data, nullptr);
+ ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(feature_bias_add), loco::Domain::Feature);
+}
diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp
new file mode 100644
index 000000000..c2f2b44c0
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasEncode.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc)
+{
+ auto input_data = annot_data(bias_enc->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(bias_enc->input()) == loco::Domain::Tensor,
+ "Input domain should be Tensor");
+ validate(input_data->shape()->rank() == 1, "Input data rank must be 1");
+
+ std::unique_ptr<NodeData> bias_enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ bias_enc_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ bias_enc_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(bias_enc_data != nullptr);
+ annot_data(bias_enc, std::move(bias_enc_data));
+ annot_domain(bias_enc, loco::Domain::Bias);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/BiasEncode.test.cpp b/compiler/locomotiv/src/Node/BiasEncode.test.cpp
new file mode 100644
index 000000000..73e2af8a8
--- /dev/null
+++ b/compiler/locomotiv/src/Node/BiasEncode.test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Buffer;
+
+namespace
+{
+template <typename T> loco::DataType loco_dtype() { throw std::runtime_error("Not supported yet"); }
+template <> loco::DataType loco_dtype<int32_t>() { return loco::DataType::S32; }
+template <> loco::DataType loco_dtype<float>() { return loco::DataType::FLOAT32; }
+
+template <typename T> const Buffer<T> *as_bufptr(const locomotiv::NodeData *data)
+{
+ throw std::runtime_error("Not supported yet");
+}
+template <> const Buffer<int32_t> *as_bufptr<int32_t>(const locomotiv::NodeData *data)
+{
+ return data->as_s32_bufptr();
+}
+template <> const Buffer<float> *as_bufptr<float>(const locomotiv::NodeData *data)
+{
+ return data->as_f32_bufptr();
+}
+
+template <typename T> void test()
+{
+ // Make pull-BiasEncode graph
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+ {
+ pull->dtype(loco_dtype<T>());
+ pull->shape({1});
+ }
+
+ auto bias_enc = g->nodes()->create<loco::BiasEncode>();
+ {
+ bias_enc->input(pull);
+ }
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<T, LexicalLayout>(Shape{1});
+ {
+ pull_buf.at(Index{0}) = static_cast<T>(100);
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+ }
+
+ locomotiv::NodeExecution::get().run(bias_enc);
+
+ // check
+ auto bias_enc_data = locomotiv::annot_data(bias_enc);
+
+ ASSERT_NE(bias_enc_data, nullptr);
+ ASSERT_EQ(bias_enc_data->dtype(), loco_dtype<T>());
+ ASSERT_EQ(*(bias_enc_data->shape()), Shape{1});
+ ASSERT_EQ(as_bufptr<T>(bias_enc_data)->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(bias_enc), loco::Domain::Bias);
+}
+} // namespace
+
+TEST(NodeExecution_BiasEncode, s32) { test<int32_t>(); }
+
+TEST(NodeExecution_BiasEncode, f32) { test<float>(); }
diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp
new file mode 100644
index 000000000..0360b9fef
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ConstGen.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <stdexcept>
+#include <cassert>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+namespace
+{
+
+/**
+ * @brief Get offset based on given shape and index. Assume lexical layout.
+ *
+ * examples)
+ * For shape = {3, 4} and index = {1, 2},
+ * offset would be 6 ( = 1 * (4) + 2 )
+ * For shape = {2, 3, 4} and index = {1, 0, 2},
+ * offset would be 14 ( = 1 * (3*4) + 0 *(4) + 2 )
+ */
+inline uint32_t offset_by_index(const Shape &shape, const Index &index)
+{
+ static const nncc::core::ADT::tensor::LexicalLayout l;
+ return l.offset(shape, index);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen)
+{
+ uint32_t volume = 1;
+
+ Shape shape;
+ shape.resize(constgen->rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ {
+ shape.dim(i) = constgen->dim(i).value();
+ volume *= shape.dim(i);
+ }
+
+ std::unique_ptr<NodeData> data = nullptr;
+
+ switch (constgen->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ assert(volume == constgen->size<loco::DataType::S32>());
+
+ auto buf = make_buffer<int32_t, LexicalLayout>(shape);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ uint32_t offset = ::offset_by_index(shape, index);
+ buf.at(index) = constgen->at<loco::DataType::S32>(offset);
+ }
+
+ data = locomotiv::make_data(buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ assert(volume == constgen->size<loco::DataType::FLOAT32>());
+
+ auto buf = make_buffer<float, LexicalLayout>(shape);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ uint32_t offset = ::offset_by_index(shape, index);
+ buf.at(index) = constgen->at<loco::DataType::FLOAT32>(offset);
+ }
+
+ data = locomotiv::make_data(buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(data != nullptr);
+ annot_data(constgen, std::move(data));
+ annot_domain(constgen, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ConstGen.test.cpp b/compiler/locomotiv/src/Node/ConstGen.test.cpp
new file mode 100644
index 000000000..838f4c11d
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ConstGen.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ConstGen, s32)
+{
+ // Make ConstGen node
+ loco::ConstGen constgen;
+
+ constgen.dtype(loco::DataType::S32);
+ constgen.shape({2, 3});
+ constgen.size<loco::DataType::S32>(6);
+
+ constgen.at<loco::DataType::S32>(0) = 0; // Set 0,0
+ constgen.at<loco::DataType::S32>(1) = 1; // Set 0,1
+ constgen.at<loco::DataType::S32>(2) = 2; // Set 0,2
+ constgen.at<loco::DataType::S32>(3) = -3; // Set 1,0
+ constgen.at<loco::DataType::S32>(4) = -4; // Set 1,1
+ constgen.at<loco::DataType::S32>(5) = -5; // Set 1,2
+
+ // run execution
+ locomotiv::NodeExecution::get().run(&constgen);
+
+ // test
+ auto data = locomotiv::annot_data(&constgen);
+ ASSERT_NE(data, nullptr);
+ ASSERT_EQ(data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*data->shape(), Shape({2, 3}));
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 0}), 0);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 1}), 1);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 2}), 2);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 0}), -3);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 1}), -4);
+ ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 2}), -5);
+
+ ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_ConstGen, f32)
+{
+ // Make ConstGen node
+ loco::ConstGen constgen;
+
+ constgen.dtype(loco::DataType::FLOAT32);
+ constgen.shape({2, 3});
+ constgen.size<loco::DataType::FLOAT32>(6);
+
+ constgen.at<loco::DataType::FLOAT32>(0) = 0.0f; // Set 0,0
+ constgen.at<loco::DataType::FLOAT32>(1) = 1.0f; // Set 0,1
+ constgen.at<loco::DataType::FLOAT32>(2) = 2.0f; // Set 0,2
+ constgen.at<loco::DataType::FLOAT32>(3) = 3.0f; // Set 1,0
+ constgen.at<loco::DataType::FLOAT32>(4) = 4.0f; // Set 1,1
+ constgen.at<loco::DataType::FLOAT32>(5) = 5.0f; // Set 1,2
+
+ // run execution
+ locomotiv::NodeExecution::get().run(&constgen);
+
+ // test
+ auto data = locomotiv::annot_data(&constgen);
+ ASSERT_NE(data, nullptr);
+ ASSERT_EQ(data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*data->shape(), Shape({2, 3}));
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 1}), 1.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 2}), 2.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+ ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 2}), 5.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp
new file mode 100644
index 000000000..2e4185574
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+// image size includes padding.
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t filter_size, uint32_t stride)
+{
+ assert((image_size + stride - filter_size) % stride == 0);
+ return (image_size + stride - filter_size) / stride;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates Conv2D
+ * @note Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input_buf,
+ const Buffer<FIL_T> *filter_buf)
+{
+ auto input_shape = input_buf->shape();
+ auto filter_shape = filter_buf->shape();
+
+ locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
+ locomotiv::validate(input_shape.dim(3) == filter_shape.dim(3),
+ "channel value mismatch"); // should have same channel values
+
+ const uint32_t input_height = input_shape.dim(1);
+ const uint32_t input_width = input_shape.dim(2);
+
+ const uint32_t filter_height = filter_shape.dim(1);
+ const uint32_t filter_width = filter_shape.dim(2);
+
+ const uint32_t stride_width = conv2d->stride()->horizontal();
+ const uint32_t stride_height = conv2d->stride()->vertical();
+
+ // TODO Enable dilations. Let's set these to 1 for now.
+ const uint32_t dilation_width_factor = 1;
+ const uint32_t dilation_height_factor = 1;
+
+ const uint32_t pad_top = conv2d->pad()->top();
+ const uint32_t pad_bottom = conv2d->pad()->bottom();
+
+ const uint32_t pad_left = conv2d->pad()->left();
+ const uint32_t pad_right = conv2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width);
+
+ const uint32_t batches = input_shape.dim(0);
+ const uint32_t input_depth = input_shape.dim(3);
+ const uint32_t output_depth = filter_shape.dim(0);
+
+ Shape output_shape{batches, output_height, output_width, output_depth};
+ auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ RET_T total = static_cast<RET_T>(0);
+
+ for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && ((unsigned)in_x < input_width) && (in_y >= 0) &&
+ ((unsigned)in_y < input_height))
+ {
+ auto input_value =
+ input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel}));
+ auto filter_value =
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ total += (input_value * filter_value);
+ }
+ }
+ }
+ }
+ output_buf.at(Index({batch, out_y, out_x, out_channel})) = total;
+ }
+ }
+ }
+ }
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d)
+{
+ auto ifm_data = annot_data(conv2d->ifm());
+ auto ker_data = annot_data(conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of Conv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of Conv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(conv2d->ifm()) == loco::Domain::Feature, "IFM of Conv2D is not feature");
+ validate(annot_domain(conv2d->ker()) == loco::Domain::Filter, "Kernel of Conv2D is not filter");
+
+ std::unique_ptr<NodeData> conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto conv2d_buf = calc_conv2D<float, float, float>(conv2d, ifm_buf, ker_buf);
+
+ conv2d_result = make_data(conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(conv2d_result != nullptr);
+
+ annot_data(conv2d, std::move(conv2d_result));
+ annot_domain(conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Conv2D.test.cpp b/compiler/locomotiv/src/Node/Conv2D.test.cpp
new file mode 100644
index 000000000..83d7fc268
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Conv2D.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of FilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+ }
+
+ // build Conv2D
+ auto conv2d = g->nodes()->create<loco::Conv2D>();
+ conv2d->ifm(ifm_enc);
+ conv2d->ker(ker_enc);
+ conv2d->stride()->vertical(stride_v);
+ conv2d->stride()->horizontal(stride_h);
+ conv2d->pad()->top(pad_top);
+ conv2d->pad()->bottom(pad_bottom);
+ conv2d->pad()->left(pad_left);
+ conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(conv2d);
+
+ // get result of calculation
+ auto conv2d_result = locomotiv::annot_data(conv2d);
+
+ // check the result
+ ASSERT_NE(conv2d_result, nullptr);
+ ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 1], stddev=1)
+ker = tf.random_normal([3, 3, 1, 1], stddev=1)
+out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_Conv2D, f32_1x5x5x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.48850584, 1.4292705, -1.3424522, -0.7441476, -1.8964586,
+ 1.7021934, -0.39246717, 0.6248314, 0.12724274, 1.3915083,
+ 0.382255, 0.7725081, 0.9171561, -1.1847119, 0.61858755,
+ 1.1530193, -0.476239, -0.9038663, -0.48764458, 0.339963,
+ 2.2817912, -0.8464133, -1.0598192, 0.8361126, 1.2344601
+ };
+
+ const float ker[] =
+ {
+ -0.0830195, 0.21088193, -0.11781317,
+ 0.07755677, 1.6337638, 1.0792778,
+ -1.6922939, -1.5437212, 0.96667504
+ };
+
+ const float ofm[] =
+ {
+ -0.28752697, 2.8108592,
+ -5.220376 , 0.7973861
+ };
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // shapes of input, ker, output
+ 2, 2 // stride
+ );
+}
+
+TEST(NodeExecution_Conv2D, f32_multiple_channel)
+{
+ // testing channel != 1, stride = [1,1]
+ using nncc::core::ADT::tensor::Shape;
+
+ float ifm[1*5*5*3];
+ for (int n = 0; n < 5*5*3; n++) ifm[n] = 2.2;
+
+ float ker[2*2*2*3]; // nhwc
+ for (int n = 0; n < 2*2*2*3; n++) ker[n] = 1.1;
+
+ float ofm[1*4*4*2];
+ for (int n = 0; n < 1*4*4*2; n++) ofm[n] = 29.04;
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 3}, Shape{2, 2, 2, 3}, Shape{1, 4, 4, 2}, // shapes of input, ker, output
+ 1, 1 // stride
+ );
+}
+
+/* ifm and ofm are from the code below:
+tensorflow version : 1.12.0
+
+import tensorflow as tf
+
+ifm = tf.constant([-1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856,
+ -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511,
+ 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274,
+ -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331,
+ 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603
+], shape=[1, 5, 5, 1])
+
+ker = tf.constant([2.3490515, -0.4572366, 0.05790535,
+ 0.3672005, 0.52679914, 0.74607974,
+ -1.7211207, 1.1174419, -0.59663385
+], shape=[3, 3, 1, 1])
+
+ofm = tf.nn.conv2d(ifm, ker, strides=[1, 1, 1, 1], padding='SAME')
+
+with tf.Session() as sess:
+ print(sess.run(ofm))
+*/
+TEST(NodeExecution_Conv2D, with_padding)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856,
+ -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511,
+ 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274,
+ -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331,
+ 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603
+ };
+
+ const float ker[] =
+ {
+ 2.3490515, -0.4572366, 0.05790535,
+ 0.3672005, 0.52679914, 0.74607974,
+ -1.7211207, 1.1174419, -0.59663385
+ };
+
+ const float ofm[] =
+ {
+ -2.443676, 4.2094254, -3.6403496, 4.8254814, -2.743059,
+ 2.5620093, -5.185688, -1.1470609, 4.54913, -2.1985974,
+ -0.5567835, 0.49045527, 2.5752437, -2.3383713, 4.455967,
+ -0.13562866, 2.9236434, 1.4019353, -3.0521483, 6.782954,
+ 0.5286269, -3.9317036, 2.285041, -1.0817666, -0.04901773
+ };
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 5, 5, 1}, // shapes of input, ker, output
+ 1, 1, // stride
+ 1, 1, 1, 1 // padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..92d5aa161
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates DepthwiseConv2D
+ * @note ifm_buf has NHWC and ker_buf HWCM format
+ * (Please check locomotiv README for further information)
+ */
+template <typename RET_T, typename IFM_T, typename KER_T>
+Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffer<IFM_T> *ifm_buf,
+ const Buffer<KER_T> *ker_buf)
+{
+ auto ifm_shape = ifm_buf->shape();
+ auto ker_shape = ker_buf->shape();
+
+ locomotiv::validate(ifm_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(ker_shape.rank() == 4, "depthwise filter rank must be 4");
+ locomotiv::validate(ifm_shape.dim(3 /* of NHWC */) == ker_shape.dim(2 /* of HWCM */),
+ "channel value mismatch"); // should have same channel values
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t ker_height = ker_shape.dim(0);
+ const uint32_t ker_width = ker_shape.dim(1);
+
+ const uint32_t stride_width = dw_conv2d->stride()->horizontal();
+ const uint32_t stride_height = dw_conv2d->stride()->vertical();
+
+ // TODO Enable dilations. Let's set these to 1 for now.
+ const uint32_t dilation_width_factor = 1;
+ const uint32_t dilation_height_factor = 1;
+
+ const uint32_t pad_top = dw_conv2d->pad()->top();
+ const uint32_t pad_bottom = dw_conv2d->pad()->bottom();
+
+ const uint32_t pad_left = dw_conv2d->pad()->left();
+ const uint32_t pad_right = dw_conv2d->pad()->right();
+
+ const uint32_t ofm_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height);
+ const uint32_t ofm_width =
+ compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width);
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t ifm_depth = ifm_shape.dim(3);
+ const uint32_t multiplier = ker_shape.dim(3);
+ const uint32_t ofm_depth = ifm_depth * multiplier;
+
+ Shape ofm_shape{batches, ofm_height, ofm_width, ofm_depth};
+ auto ofm_buf = make_buffer<RET_T, LexicalLayout>(ofm_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t ofm_y = 0; ofm_y < ofm_height; ++ofm_y)
+ {
+ for (uint32_t ofm_x = 0; ofm_x < ofm_width; ++ofm_x)
+ {
+ for (uint32_t ch = 0; ch < ifm_depth; ++ch)
+ {
+ for (uint32_t nth = 0; nth < multiplier; nth++)
+ {
+ const int in_x_origin = (ofm_x * stride_width) - pad_left;
+ const int in_y_origin = (ofm_y * stride_height) - pad_top;
+ float total = 0.f;
+ for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y)
+ {
+ for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * ker_x;
+ const int in_y = in_y_origin + dilation_height_factor * ker_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && ((unsigned)in_x < ifm_width) && (in_y >= 0) &&
+ ((unsigned)in_y < ifm_height))
+ {
+ auto ifm_value = ifm_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, ch}));
+ auto ker_value = ker_buf->at(Index({ker_y, ker_x, ch, nth}));
+ total += (ifm_value * ker_value);
+ }
+ }
+ }
+ uint32_t ofm_channel = ch * multiplier + nth;
+ ofm_buf.at(Index({batch, ofm_y, ofm_x, ofm_channel})) = total;
+ }
+ }
+ }
+ }
+ }
+ return ofm_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+{
+ auto ifm_data = annot_data(dw_conv2d->ifm());
+ auto ker_data = annot_data(dw_conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of DepthwiseConv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of DepthwiseConv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(dw_conv2d->ifm()) == loco::Domain::Feature,
+ "IFM of DepthwiseConv2D is not feature");
+ validate(annot_domain(dw_conv2d->ker()) == loco::Domain::DepthwiseFilter,
+ "Kernel of DepthwiseConv2D is not depthwise filter");
+
+ std::unique_ptr<NodeData> dw_conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto dw_conv2d_buf = calc_dw_conv2d<float, float, float>(dw_conv2d, ifm_buf, ker_buf);
+
+ dw_conv2d_result = make_data(dw_conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(dw_conv2d_result != nullptr);
+
+ annot_data(dw_conv2d, std::move(dw_conv2d_result));
+ annot_domain(dw_conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..48824c2e0
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of DepthwiseFilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::DepthwiseFilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::DepthwiseFilter);
+ }
+
+ // build DepthwiseConv2D
+ auto dw_conv2d = g->nodes()->create<loco::DepthwiseConv2D>();
+ dw_conv2d->ifm(ifm_enc);
+ dw_conv2d->ker(ker_enc);
+ dw_conv2d->stride()->vertical(stride_v);
+ dw_conv2d->stride()->horizontal(stride_h);
+ dw_conv2d->pad()->top(pad_top);
+ dw_conv2d->pad()->bottom(pad_bottom);
+ dw_conv2d->pad()->left(pad_left);
+ dw_conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(dw_conv2d);
+
+ // get result of calculation
+ auto dw_conv2d_result = locomotiv::annot_data(dw_conv2d);
+
+ // check the result
+ ASSERT_NE(dw_conv2d_result, nullptr);
+ ASSERT_TRUE(dw_conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(dw_conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(dw_conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+
+/* ifm, ker and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 2], stddev=1.1)
+ker = tf.random_normal([4, 4, 2, 3], stddev=1.1)
+out = tf.nn.depthwise_conv2d(ifm, ker, strides = [1, 1, 1, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_DepthwiseConv2D, f32_random_valid)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] = {0.8122538, 1.209147, 0.6903842, -0.26646265, 1.516799, -1.8540707,
+ -0.74240327, 1.7811562, -0.03699546, -0.44468504, -1.4982721, -1.1858582,
+ -0.21140318, -0.974522, 1.0000849, -1.294535, -0.6108882, 0.25827602,
+ 1.3631831, -0.5180266, 0.20870179, 0.18333802, -0.42263857, -1.6694735,
+ 0.0415236, -0.3903758, 2.0933757, -0.29660916, 2.1218338, -1.1599928,
+ 0.57163256, 0.48865932, -1.3622656, 0.35924262, 1.2951899, -0.1769997,
+ 0.74513537, -0.31920406, -1.2902768, -0.7095059, 1.9157801, -0.41028237,
+ 1.2502829, 0.3354887, 1.4199319, -0.20366786, -0.8828556, 0.5173567,
+ 1.7708117, -0.30096334};
+ const float ker[] = {
+ -0.19805557, 0.58464956, -0.7804337, 0.06974592, 0.45790604, 0.24833807, 0.43393376,
+ 0.2541043, -0.04406675, -0.32167575, 1.0546446, -1.4978354, 0.20829494, 1.1659569,
+ 0.37908667, -0.94137955, 0.293349, -1.1023049, 0.76133233, 0.55595005, 1.4458209,
+ 1.6128604, 1.5655615, -2.183877, -0.90535915, -0.49858555, 1.7168728, -1.1590382,
+ 0.6706056, 1.2215618, -0.06603386, 0.16559464, 0.541991, -0.44488335, 0.766181,
+ 1.0227629, -0.6352362, -1.670828, -0.63334507, 0.0313305, -0.6721083, 0.50112915,
+ -0.15218066, 0.67222077, -0.3613627, -0.08516614, -0.5024078, -0.9503976, -2.1892295,
+ 1.8308185, -0.15187284, 1.5761136, 0.24869336, -1.7378871, -0.22518761, 1.0175673,
+ 0.7084485, -0.74157554, -1.8185995, -1.3330095, -0.04427439, 1.0556892, -0.68243974,
+ 0.32001218, 2.0901792, -1.1612813, 0.7294674, 0.05740008, -0.00832882, 1.0446658,
+ 0.4477195, -0.09174404, -1.0176039, 1.5066665, -2.148343, 0.29421416, 0.93011874,
+ -0.15737922, -1.6444012, 0.25780794, -0.6545867, -0.3488956, 0.26167992, -0.154414,
+ 0.2798124, -0.8590068, 2.0494444, 0.48268002, 0.81941164, -0.4848027, 0.76870304,
+ 0.7102261, 0.45778143, 0.23214905, -0.17742023, -0.75016516};
+ const float ofm[] = {4.474646, 0.6792067, -1.9799856, 7.484751, 4.3087378, -1.905938,
+ 1.4887369, 0.4361322, 0.79539883, -3.8583446, -4.502204, 4.356392,
+ -5.3030324, 3.493003, -4.349277, 2.3069482, -3.8881323, -0.73901534,
+ -0.6629516, 2.1247253, -4.9229584, 1.6716996, -3.0208125, 1.0597891};
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 2}, Shape{4, 4, 2, 3}, Shape{1, 2, 2, 6}, // shapes of input, ker, output
+ 1, 1 // stride
+ );
+}
+
+// TODO Add same padding test
+
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
new file mode 100644
index 000000000..17004901f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/**
+ * @brief Encode input tensor into depthwise filter represented in "HWCM" layout
+ *
+ * (Please check locomotiv README for further information)
+ */
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilterEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::DepthwiseFilterShape node_shape = encoder->shape(input_shape);
+
+ // Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape
+ Buffer<T> node_buf = make_buffer<T, LexicalLayout>(
+ Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(),
+ node_shape.multiplier().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::DepthwiseFilterIndex index;
+ index.row() = e.current().at(0);
+ index.column() = e.current().at(1);
+ index.channel() = e.current().at(2);
+ index.nth() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of DepthwiseFilterEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of DepthwiseFilterEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = dw_filter_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::DepthwiseFilter);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
new file mode 100644
index 000000000..db828c08b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(NodeExecution_DepthwiseFilterEncode, f32)
+{
+ const uint32_t H = 2;
+ const uint32_t W = 3;
+ const uint32_t C = 4;
+ const uint32_t M = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+
+ // Make and assign "MHWC" data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{M, H, W, C});
+ float f = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as MHWC
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>(
+ new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>);
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2;
+ encoder->perm()->axis(loco::DepthwiseFilterAxis::Depth) = 3;
+
+ // DepthwiseFilterEncode
+ auto enc = g->nodes()->create<loco::DepthwiseFilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W, C, M})); // locomotiv depthwise filter is HWCM
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ for (uint32_t m = 0; m < M; ++m)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{m, h, w, c}), enc_buf->at(Index{h, w, c, m}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::DepthwiseFilter);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.cpp
new file mode 100644
index 000000000..e5e2d67c7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseAdd.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseAdd *eltwise_add)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs + rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_add, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp
new file mode 100644
index 000000000..2899dccdd
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.add(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseAdd, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18};
+ float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // make EltwiseAdd(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_add = g->nodes()->create<loco::EltwiseAdd>();
+ {
+ eltwise_add->lhs(inp_lhs);
+ eltwise_add->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_add);
+
+ // get result
+ auto eltwise_add_data = locomotiv::annot_data(eltwise_add);
+
+ // comparing the result
+ ASSERT_NE(eltwise_add_data, nullptr);
+ ASSERT_EQ(eltwise_add_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_add_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_add_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_add), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.cpp
new file mode 100644
index 000000000..a054d9a97
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseDiv.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseDiv *eltwise_div)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs / rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_div, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp
new file mode 100644
index 000000000..60950c15b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.div(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseDiv, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float out_val[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+ // make EltwiseDiv(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_div = g->nodes()->create<loco::EltwiseDiv>();
+ {
+ eltwise_div->lhs(inp_lhs);
+ eltwise_div->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_div);
+
+ // get result
+ auto eltwise_div_data = locomotiv::annot_data(eltwise_div);
+
+ // comparing the result
+ ASSERT_NE(eltwise_div_data, nullptr);
+ ASSERT_EQ(eltwise_div_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_div_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_div_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_div_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_div), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseMax.cpp b/compiler/locomotiv/src/Node/EltwiseMax.cpp
new file mode 100644
index 000000000..ec44fd6fa
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cstdlib>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseMax *eltwise_max)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return std::max(lhs, rhs); }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_max, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseMax.test.cpp b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp
new file mode 100644
index 000000000..c64db8994
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.maximum(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseMax, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+ float out_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+
+ // make EltwiseMax(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_max = g->nodes()->create<loco::EltwiseMax>();
+ {
+ eltwise_max->lhs(inp_lhs);
+ eltwise_max->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_max);
+
+ // get result
+ auto eltwise_max_data = locomotiv::annot_data(eltwise_max);
+
+ // comparing the result
+ ASSERT_NE(eltwise_max_data, nullptr);
+ ASSERT_EQ(eltwise_max_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_max_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_max_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_max_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_max), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseMul.cpp b/compiler/locomotiv/src/Node/EltwiseMul.cpp
new file mode 100644
index 000000000..6720ab92f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMul.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseMul *eltwise_mul)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs * rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_mul, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseMul.test.cpp b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp
new file mode 100644
index 000000000..b76888300
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], shape=[1, 3, 3, 2],
+ dtype=tf.float32)
+out = tf.math.multiply(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseMul, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
+ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+ float out_val[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+ 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8};
+
+ // make EltwiseMul(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_mul = g->nodes()->create<loco::EltwiseMul>();
+ {
+ eltwise_mul->lhs(inp_lhs);
+ eltwise_mul->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_mul);
+
+ // get result
+ auto eltwise_mul_data = locomotiv::annot_data(eltwise_mul);
+
+ // comparing the result
+ ASSERT_NE(eltwise_mul_data, nullptr);
+ ASSERT_EQ(eltwise_mul_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_mul_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_mul_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_mul_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_mul), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp
new file mode 100644
index 000000000..b4625a757
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cmath>
+
+namespace
+{
+
+inline float sqrt_ew(float val) { return sqrt(val); }
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return sqrt_ew(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseSqrt *sqrt_node)
+{
+ Func f;
+
+ eltwise_unary(sqrt_node, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp
new file mode 100644
index 000000000..adb1b853e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <limits>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_EltwiseSqrt, f32)
+{
+ // Make Pull-EltwiseSqrt graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({4});
+ auto sqrt = g->nodes()->create<loco::EltwiseSqrt>();
+ sqrt->input(pull);
+
+ // Make and assign data to Pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4});
+ pull_buf.at(Index{0}) = 4.0f;
+ pull_buf.at(Index{1}) = 9.0f;
+ pull_buf.at(Index{2}) = 0.0f;
+ pull_buf.at(Index{3}) = -1.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(sqrt);
+
+ auto sqrt_data = locomotiv::annot_data(sqrt);
+ ASSERT_NE(sqrt_data, nullptr);
+ ASSERT_EQ(sqrt_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(sqrt_data->shape()), Shape{4});
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{0}), 2.0f);
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{1}), 3.0f);
+ ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{2}), 0.0f);
+ ASSERT_TRUE(std::isnan(sqrt_data->as_f32_bufptr()->at(Index{3})));
+
+ ASSERT_EQ(locomotiv::annot_domain(sqrt), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/EltwiseSub.cpp b/compiler/locomotiv/src/Node/EltwiseSub.cpp
new file mode 100644
index 000000000..7943f950b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSub.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::EltwiseSub *eltwise_sub)
+{
+ struct Func final : public BinaryFunc
+ {
+ float apply(float lhs, float rhs) const { return lhs - rhs; }
+ };
+
+ Func f;
+
+ eltwise_binary(eltwise_sub, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/EltwiseSub.test.cpp b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp
new file mode 100644
index 000000000..7eff90f9e
--- /dev/null
+++ b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+/*
+test case generated from the following:
+
+x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
+ shape=[1, 3, 3, 2], dtype=tf.float32)
+out = tf.math.subtract(x, y)
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+TEST(NodeExecution_EltwiseSub, f32)
+{
+ float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // make EltwiseSub(Pull, Pull)
+ auto g = loco::make_graph();
+ Shape input_shape{1, 3, 3, 2}; // NHWC
+
+ auto inp_lhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_lhs->dtype(loco::DataType::FLOAT32);
+ inp_lhs->shape({1, 3, 3, 2});
+ }
+
+ auto inp_rhs = g->nodes()->create<loco::Pull>();
+ {
+ inp_rhs->dtype(loco::DataType::FLOAT32);
+ inp_rhs->shape({1, 3, 3, 2});
+ }
+
+ auto eltwise_sub = g->nodes()->create<loco::EltwiseSub>();
+ {
+ eltwise_sub->lhs(inp_lhs);
+ eltwise_sub->rhs(inp_rhs);
+ }
+
+ // Make and assign data to two pull nodes
+ auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_lhs_buf.at(e.current()) = x_val[n++];
+ }
+ }
+
+ auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape);
+ {
+ int n = 0;
+ for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance())
+ {
+ inp_rhs_buf.at(e.current()) = y_val[n++];
+ }
+ }
+
+ auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf);
+ locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data));
+ locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor);
+
+ auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf);
+ locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data));
+ locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor);
+
+ // run the network
+ locomotiv::NodeExecution::get().run(eltwise_sub);
+
+ // get result
+ auto eltwise_sub_data = locomotiv::annot_data(eltwise_sub);
+
+ // comparing the result
+ ASSERT_NE(eltwise_sub_data, nullptr);
+ ASSERT_EQ(eltwise_sub_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(eltwise_sub_data->shape()), Shape({1, 3, 3, 2}));
+
+ uint32_t n = 0;
+ for (IndexEnumerator e{*(eltwise_sub_data->shape())}; e.valid(); e.advance())
+ {
+ ASSERT_FLOAT_EQ(eltwise_sub_data->as_f32_bufptr()->at(e.current()), out_val[n++]);
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(eltwise_sub), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
new file mode 100644
index 000000000..c35f0e69a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Buffer;
+
+// This file is intended to test FeatureEncode and FeatureDecode at once
+namespace
+{
+
+class NodeExecution_FeatureCodec : public ::testing::Test
+{
+private:
+ loco::Graph g;
+
+protected:
+ /// @brief Make Pull node and set data by given buffer and data type
+ template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype)
+ {
+ auto pull = g.nodes()->create<loco::Pull>();
+ pull->dtype(dtype);
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ return pull;
+ }
+
+ /// @brief Make FeatureEncode node with given input and encoding permutation
+ loco::FeatureEncode *feature_encode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>(
+ new loco::PermutingEncoder<loco::Domain::Feature>);
+
+ encoder->perm(perm);
+
+ auto enc = g.nodes()->create<loco::FeatureEncode>();
+ enc->input(input);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+ }
+
+ /// @brief Make FeatureDecode node with given input and decoding permutation
+ loco::FeatureDecode *feature_decode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Feature> &perm)
+ {
+ auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>(
+ new loco::PermutingDecoder<loco::Domain::Feature>);
+
+ decoder->perm(perm);
+
+ auto dec = g.nodes()->create<loco::FeatureDecode>();
+ dec->input(input);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+ }
+};
+
+} // namespace
+
+TEST_F(NodeExecution_FeatureCodec, s32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ // Make "NCHW" data for pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W});
+ int32_t i = 0;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+
+ // Make NCHW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> NCHW;
+
+ NCHW.axis(loco::FeatureAxis::Count) = 0;
+ NCHW.axis(loco::FeatureAxis::Depth) = 1;
+ NCHW.axis(loco::FeatureAxis::Height) = 2;
+ NCHW.axis(loco::FeatureAxis::Width) = 3;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::S32);
+
+ // FeatureEncode
+ auto enc = feature_encode_layer(pull, NCHW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test FeatureEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature);
+
+ // FeatureDecode
+ auto dec = feature_decode_layer(enc, NCHW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test FeatureDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{N, C, H, W}));
+ auto dec_buf = dec_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), dec_buf->at(Index{n, c, h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
+
+TEST_F(NodeExecution_FeatureCodec, f32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ // Make crazy "CHNW" data for pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W});
+ float f = 0.0f;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+
+ // Make CHNW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Feature> CHNW;
+
+ CHNW.axis(loco::FeatureAxis::Depth) = 0;
+ CHNW.axis(loco::FeatureAxis::Height) = 1;
+ CHNW.axis(loco::FeatureAxis::Count) = 2;
+ CHNW.axis(loco::FeatureAxis::Width) = 3;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32);
+
+ // FeatureEncode
+ auto enc = feature_encode_layer(pull, CHNW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test FeatureEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature);
+
+ // FeatureDecode
+ auto dec = feature_decode_layer(enc, CHNW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test FeatureDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{C, H, N, W}));
+ auto dec_buf = dec_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), dec_buf->at(Index{c, h, n, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp
new file mode 100644
index 000000000..8a56a56b2
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Index;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *node,
+ const Buffer<T> *input_buf)
+{
+ auto decoder = node->decoder();
+
+ // Make FeatureShape from input. Note that feature in locomotiv represented as NHWC
+ loco::FeatureShape input_shape;
+ assert(input_buf->shape().rank() == 4);
+ input_shape.count() = input_buf->shape().dim(0);
+ input_shape.height() = input_buf->shape().dim(1);
+ input_shape.width() = input_buf->shape().dim(2);
+ input_shape.depth() = input_buf->shape().dim(3);
+
+ loco::TensorShape node_shape = decoder->shape(input_shape);
+
+ // Make tensor buffer from TensorShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(),
+ node_shape.dim(2).value(), node_shape.dim(3).value()});
+
+ // Copy buffer in an order arranged by decoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FeatureIndex feature_index = decoder->value(e.current());
+ Index buf_index({feature_index.batch(), feature_index.row(), feature_index.column(),
+ feature_index.channel()});
+
+ node_buf.at(e.current()) = input_buf->at(buf_index);
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec)
+{
+ auto input_data = annot_data(dec->input());
+
+ validate(input_data, "Input of FeatureDecode not ready");
+ validate(annot_domain(dec->input()) == loco::Domain::Feature,
+ "Input of FeatureDecode is not Feature");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> dec_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ dec_data = feature_decode<int32_t>(dec, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ dec_data = feature_decode<float>(dec, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(dec_data != nullptr);
+ annot_data(dec, std::move(dec_data));
+ annot_domain(dec, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FeatureEncode.cpp b/compiler/locomotiv/src/Node/FeatureEncode.cpp
new file mode 100644
index 000000000..406de76ff
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FeatureEncode.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> feature_encode(const loco::FeatureEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::FeatureShape node_shape = encoder->shape(input_shape);
+
+ // Make NHWC buffer from FeatureShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FeatureIndex index;
+ index.batch() = e.current().at(0);
+ index.row() = e.current().at(1);
+ index.column() = e.current().at(2);
+ index.channel() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of FeatureEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of FeatureEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ enc_data = feature_encode<int32_t>(enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = feature_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp
new file mode 100644
index 000000000..cd9d708dc
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 4);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::FilterShape node_shape = encoder->shape(input_shape);
+
+ // Make NHWC buffer from FilterShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(),
+ node_shape.width().value(), node_shape.depth().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::FilterIndex index;
+ index.nth() = e.current().at(0);
+ index.row() = e.current().at(1);
+ index.column() = e.current().at(2);
+ index.channel() = e.current().at(3);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc)
+{
+ auto input_data = annot_data(enc->input());
+
+ validate(input_data, "Input of FilterEncode not ready");
+ validate(annot_domain(enc->input()) == loco::Domain::Tensor,
+ "Input of FilterEncode is not Tensor");
+ validate(input_data->shape()->rank() == 4, "Input shape mismatch");
+
+ std::unique_ptr<NodeData> enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ enc_data = filter_encode<int32_t>(enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ enc_data = filter_encode<float>(enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(enc_data != nullptr);
+ annot_data(enc, std::move(enc_data));
+ annot_domain(enc, loco::Domain::Filter);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.test.cpp b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
new file mode 100644
index 000000000..79b8308e2
--- /dev/null
+++ b/compiler/locomotiv/src/Node/FilterEncode.test.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+TEST(NodeExecution_FilterEncode, s32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+
+ // Make and assign "NCHW" data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W});
+ int32_t i = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as NCHW
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
+ new loco::PermutingEncoder<loco::Domain::Filter>);
+ encoder->perm()->axis(loco::FilterAxis::Count) = 0;
+ encoder->perm()->axis(loco::FilterAxis::Depth) = 1;
+ encoder->perm()->axis(loco::FilterAxis::Height) = 2;
+ encoder->perm()->axis(loco::FilterAxis::Width) = 3;
+
+ // FilterEncode
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter);
+}
+
+TEST(NodeExecution_FilterEncode, f32)
+{
+ const uint32_t N = 2;
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+ const uint32_t C = 5;
+
+ auto g = loco::make_graph();
+
+ // Pull
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+
+ // Make and assign crazy "CHNW" data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W});
+ float f = 1;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ // Encoder to correctly read input tensor as CHNW
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>(
+ new loco::PermutingEncoder<loco::Domain::Filter>);
+ encoder->perm()->axis(loco::FilterAxis::Depth) = 0;
+ encoder->perm()->axis(loco::FilterAxis::Height) = 1;
+ encoder->perm()->axis(loco::FilterAxis::Count) = 2;
+ encoder->perm()->axis(loco::FilterAxis::Width) = 3;
+
+ // FilterEncode
+ auto enc = g->nodes()->create<loco::FilterEncode>();
+ enc->input(pull);
+ enc->encoder(std::move(encoder));
+
+ locomotiv::NodeExecution::get().run(enc);
+
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t n = 0; n < N; ++n)
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ for (uint32_t c = 0; c < C; ++c)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter);
+}
diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp
new file mode 100644
index 000000000..eb7d44a59
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Forward.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward)
+{
+ auto input_data = annot_data(forward->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(forward->input()) != loco::Domain::Unknown,
+ "Input domain must not Unknown");
+
+ std::unique_ptr<NodeData> forward_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ forward_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ forward_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(forward_data != nullptr);
+ annot_data(forward, std::move(forward_data));
+ annot_domain(forward, annot_domain(forward->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Forward.test.cpp b/compiler/locomotiv/src/Node/Forward.test.cpp
new file mode 100644
index 000000000..73d37139a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Forward.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Forward, s32)
+{
+ // Make pull-forward graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+ pull->shape({1});
+ auto forward = g->nodes()->create<loco::Forward>();
+ forward->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 42;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(forward);
+
+ auto forward_data = locomotiv::annot_data(forward);
+ ASSERT_NE(forward_data, nullptr);
+ ASSERT_EQ(forward_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(forward_data->shape()), Shape{1});
+ ASSERT_EQ(forward_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Forward, f32)
+{
+ // Make pull-forward graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1});
+ auto forward = g->nodes()->create<loco::Forward>();
+ forward->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 3.14f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(forward);
+
+ auto forward_data = locomotiv::annot_data(forward);
+ ASSERT_NE(forward_data, nullptr);
+ ASSERT_EQ(forward_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(forward_data->shape()), Shape{1});
+ ASSERT_FLOAT_EQ(forward_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp
new file mode 100644
index 000000000..77b7315a9
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatMul.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculate Matrix Multiplication
+ */
+template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buffer<T> *rhs_buf)
+{
+ const auto lhs_shape = lhs_buf->shape();
+ const auto rhs_shape = rhs_buf->shape();
+
+ assert(lhs_shape.rank() == 2 && "lhs rank must be 2");
+ assert(rhs_shape.rank() == 2 && "rhs rank must be 2");
+ // lhs width should be the same as rhs height
+ assert(lhs_shape.dim(1) == rhs_shape.dim(0) && "height/width mismatch");
+
+ const uint32_t lhs_height = lhs_shape.dim(0);
+ const uint32_t lhs_width = lhs_shape.dim(1);
+
+ const uint32_t rhs_width = rhs_shape.dim(1);
+
+ const uint32_t output_height = lhs_height;
+ const uint32_t output_width = rhs_width;
+
+ Shape output_shape{output_height, output_width};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ T total = static_cast<T>(0); // accumulator
+ // Accumulate through axis
+ for (uint32_t axis = 0; axis < lhs_width; ++axis)
+ {
+ total += lhs_buf->at(Index({out_y, axis})) * rhs_buf->at(Index({axis, out_x}));
+ }
+ // Set output value
+ output_buf.at(Index({out_y, out_x})) = total;
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul)
+{
+ auto lhs_data = annot_data(mat_mul->lhs());
+ auto rhs_data = annot_data(mat_mul->rhs());
+
+ validate(lhs_data, "Can't find left matrix data of MatMul");
+ validate(lhs_data->shape()->rank() == 2, "lhs rank must be 2");
+
+ validate(rhs_data, "Can't find right matrix data of MatMul");
+ validate(rhs_data->shape()->rank() == 2, "rhs rank must be 2");
+
+ validate(annot_domain(mat_mul->lhs()) == loco::Domain::Matrix,
+ "Left matrix of MatMul is not a Matrix");
+ validate(annot_domain(mat_mul->rhs()) == loco::Domain::Matrix,
+ "Right matrix of MatMul is not a Matrix");
+
+ std::unique_ptr<NodeData> mat_mul_result = nullptr;
+
+ if (lhs_data->dtype() == loco::DataType::FLOAT32 && rhs_data->dtype() == loco::DataType::FLOAT32)
+ {
+ const auto lhs_buf = lhs_data->as_f32_bufptr();
+ const auto rhs_buf = rhs_data->as_f32_bufptr();
+
+ auto mat_mul_buf = calc_mat_mul<float>(lhs_buf, rhs_buf);
+
+ mat_mul_result = make_data(mat_mul_buf);
+ }
+ else if (lhs_data->dtype() == loco::DataType::S32 && rhs_data->dtype() == loco::DataType::S32)
+ {
+ const auto lhs_buf = lhs_data->as_s32_bufptr();
+ const auto rhs_buf = rhs_data->as_s32_bufptr();
+
+ auto mat_mul_buf = calc_mat_mul<int32_t>(lhs_buf, rhs_buf);
+
+ mat_mul_result = make_data(mat_mul_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(mat_mul_result != nullptr);
+
+ annot_data(mat_mul, std::move(mat_mul_result));
+ annot_domain(mat_mul, loco::Domain::Matrix);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatMul.test.cpp b/compiler/locomotiv/src/Node/MatMul.test.cpp
new file mode 100644
index 000000000..bd480f7c7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatMul.test.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+template <typename T>
+void run_test(const T *lhs, const T *rhs, const T *expected_output, const Shape &lhs_shape,
+ const Shape &rhs_shape, const Shape &out_shape, loco::DataType expected_datatype)
+{
+ auto g = loco::make_graph();
+ // Fill lhs MatrixEncode
+ auto lhs_enc = g->nodes()->create<loco::MatrixEncode>();
+ {
+ auto lhs_enc_buf = make_buffer<T, LexicalLayout>(lhs_shape);
+ auto lhs_overlay = make_overlay<T, LexicalLayout>(lhs_shape, const_cast<T *>(lhs));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{lhs_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ lhs_enc_buf.at(ind) = lhs_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(lhs_enc_buf);
+ locomotiv::annot_data(lhs_enc, std::move(enc_data));
+ locomotiv::annot_domain(lhs_enc, loco::Domain::Matrix);
+ }
+ // Fill rhs MatrixEncode
+ auto rhs_enc = g->nodes()->create<loco::MatrixEncode>();
+ {
+ auto rhs_enc_buf = make_buffer<T, LexicalLayout>(rhs_shape);
+ auto rhs_overlay = make_overlay<T, LexicalLayout>(rhs_shape, const_cast<T *>(rhs));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{rhs_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ rhs_enc_buf.at(ind) = rhs_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(rhs_enc_buf);
+ locomotiv::annot_data(rhs_enc, std::move(enc_data));
+ locomotiv::annot_domain(rhs_enc, loco::Domain::Matrix);
+ }
+
+ // build MatMul
+ auto mat_mul = g->nodes()->create<loco::MatMul>();
+ mat_mul->lhs(lhs_enc);
+ mat_mul->rhs(rhs_enc);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(mat_mul);
+
+ // get result of calculation
+ auto mat_mul_result = locomotiv::annot_data(mat_mul);
+
+ // check the result
+ ASSERT_NE(mat_mul_result, nullptr);
+ ASSERT_TRUE(mat_mul_result->dtype() == expected_datatype);
+ ASSERT_TRUE(*(mat_mul_result->shape()) == out_shape);
+
+ auto out_overlay = make_overlay<T, LexicalLayout>(out_shape, const_cast<T *>(expected_output));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{out_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ if (expected_datatype == loco::DataType::FLOAT32)
+ ASSERT_FLOAT_EQ(mat_mul_result->as_f32_bufptr()->at(ind), out_overlay.at(ind));
+ else if (expected_datatype == loco::DataType::S32)
+ ASSERT_EQ(mat_mul_result->as_s32_bufptr()->at(ind), out_overlay.at(ind));
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(mat_mul), loco::Domain::Matrix);
+}
+
+} // namespace
+
+// clang-format off
+/* from the code below:
+
+import numpy as np
+
+a = [[-0.48850584, 1.4292705, -1.3424522],
+ [1.7021934, -0.39246717, 0.6248314]]
+
+b = [[-0.0830195, 0.21088193, -0.11781317],
+ [0.07755677, 1.6337638, 1.0792778],
+ [-1.6922939, -1.5437212, 0.96667504]]
+
+print(np.array(a) @ np.array(b))
+*/
+TEST(NodeExecution_MatMul, f32_2x3_3x3)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float lhs[] =
+ {
+ -0.48850584, 1.4292705, -1.3424522,
+ 1.7021934, -0.39246717, 0.6248314
+ };
+
+ const float rhs[] =
+ {
+ -0.0830195, 0.21088193, -0.11781317,
+ 0.07755677, 1.6337638, 1.0792778,
+ -1.6922939, -1.5437212, 0.96667504
+ };
+
+ const float out[] =
+ {
+ 2.42322878, 4.30444527, 0.30241731,
+ -1.2291521, -1.2468023, -0.02011299
+ };
+
+ run_test<float>(lhs, rhs, out, Shape{2, 3}, Shape{3, 3}, Shape{2, 3}, loco::DataType::FLOAT32);
+}
+
+/* from the code below:
+
+import numpy as np
+
+a = np.random.randint(10000, size=(4, 2))
+
+b = np.random.randint(10000, size=(2, 6))
+
+print(a)
+print(b)
+print(np.array(a) @ np.array(b))
+*/
+TEST(NodeExecution_MatMul, s32_4x2_2x6)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const int32_t lhs[] =
+ {
+ 6392, 4993,
+ 54, 9037,
+ 3947, 5820,
+ 5800, 4181
+ };
+
+ const int32_t rhs[] =
+ {
+ 2694, 8376, 8090, 1285, 7492, 1652,
+ 5427, 8798, 7634, 2229, 5439, 6999
+ };
+
+ const int32_t out[] =
+ {
+ 44317059, 97467806, 89827842, 19343117, 75045791, 45505591,
+ 49189275, 79959830, 69425318, 20212863, 49556811, 63339171,
+ 42218358, 84264432, 76361110, 18044675, 61225904, 47254624,
+ 38315487, 85365238, 78839754, 16772449, 66194059, 38844419
+ };
+
+ run_test<int32_t>(lhs, rhs, out, Shape{4, 2}, Shape{2, 6}, Shape{4, 6}, loco::DataType::S32);
+}
+
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
new file mode 100644
index 000000000..8fc5d593b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <loco/IR/PermutingCodec.h>
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Buffer;
+
+// This file is intended to test MatrixEncode and MatrixDecode at once
+namespace
+{
+
+class NodeExecution_MatrixCodec : public ::testing::Test
+{
+private:
+ loco::Graph g;
+
+protected:
+ /// @brief Make Pull node and set data by given buffer and data type
+ template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype)
+ {
+ auto pull = g.nodes()->create<loco::Pull>();
+ pull->dtype(dtype);
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ return pull;
+ }
+
+ /// @brief Make MatrixEncode node with given input and encoding permutation
+ loco::MatrixEncode *matrix_encode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Matrix> &perm)
+ {
+ auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>(
+ new loco::PermutingEncoder<loco::Domain::Matrix>);
+
+ encoder->perm(perm);
+
+ auto enc = g.nodes()->create<loco::MatrixEncode>();
+ enc->input(input);
+ enc->encoder(std::move(encoder));
+
+ return enc;
+ }
+
+ /// @brief Make MatrixDecode node with given input and decoding permutation
+ loco::MatrixDecode *matrix_decode_layer(loco::Node *input,
+ const loco::Permutation<loco::Domain::Matrix> &perm)
+ {
+ auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>(
+ new loco::PermutingDecoder<loco::Domain::Matrix>);
+
+ decoder->perm(perm);
+
+ auto dec = g.nodes()->create<loco::MatrixDecode>();
+ dec->input(input);
+ dec->decoder(std::move(decoder));
+
+ return dec;
+ }
+};
+
+} // namespace
+
+TEST_F(NodeExecution_MatrixCodec, HW_s32)
+{
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+
+ // Make HW data for pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{H, W});
+ int32_t i = 0;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = i;
+ ++i; // Doesn't matter what it is
+ }
+
+ // Make HW permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Matrix> HW;
+
+ HW.axis(loco::MatrixAxis::Height) = 0;
+ HW.axis(loco::MatrixAxis::Width) = 1;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::S32);
+
+ // MatrixEncode
+ auto enc = matrix_encode_layer(pull, HW);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test MatrixEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW
+ auto enc_buf = enc_data->as_s32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_EQ(pull_buf.at(Index{h, w}), enc_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix);
+
+ // MatrixDecode
+ auto dec = matrix_decode_layer(enc, HW);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test MatrixDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{H, W}));
+ auto dec_buf = dec_data->as_s32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_EQ(pull_buf.at(Index{h, w}), dec_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
+
+TEST_F(NodeExecution_MatrixCodec, WH_f32)
+{
+ const uint32_t W = 6;
+ const uint32_t H = 5;
+
+ // Make crazy WH data for pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{W, H});
+ float f = 0.0f;
+ for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance())
+ {
+ pull_buf.at(e.current()) = f;
+ f += 0.1f; // Doesn't matter what it is
+ }
+
+ // Make WH permutation for encoder and decoder
+ loco::Permutation<loco::Domain::Matrix> WH;
+
+ WH.axis(loco::MatrixAxis::Width) = 0;
+ WH.axis(loco::MatrixAxis::Height) = 1;
+
+ // Pull
+ auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32);
+
+ // MatrixEncode
+ auto enc = matrix_encode_layer(pull, WH);
+ locomotiv::NodeExecution::get().run(enc);
+
+ // Test MatrixEncode
+ auto enc_data = locomotiv::annot_data(enc);
+ ASSERT_NE(enc_data, nullptr);
+ ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW
+ auto enc_buf = enc_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), enc_buf->at(Index{h, w}));
+
+ ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix);
+
+ // MatrixDecode
+ auto dec = matrix_decode_layer(enc, WH);
+ locomotiv::NodeExecution::get().run(dec);
+
+ // Test MatrixDecode: Encode -> Decode == identity
+ auto dec_data = locomotiv::annot_data(dec);
+ ASSERT_NE(dec_data, nullptr);
+ ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(dec_data->shape()), (Shape{W, H}));
+ auto dec_buf = dec_data->as_f32_bufptr();
+ for (uint32_t h = 0; h < H; ++h)
+ for (uint32_t w = 0; w < W; ++w)
+ ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), dec_buf->at(Index{w, h}));
+
+ ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp
new file mode 100644
index 000000000..c591676ae
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::Index;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *node,
+ const Buffer<T> *input_buf)
+{
+ auto decoder = node->decoder();
+
+ // Make MatrixShape from input. Note that matrix in locomotiv represented as HW
+ loco::MatrixShape input_shape;
+ assert(input_buf->shape().rank() == 2);
+ input_shape.height() = input_buf->shape().dim(0);
+ input_shape.width() = input_buf->shape().dim(1);
+
+ loco::TensorShape node_shape = decoder->shape(input_shape);
+
+ // Make tensor buffer from TensorShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()});
+
+ // Copy buffer in an order arranged by decoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::MatrixIndex matrix_index = decoder->value(e.current());
+ Index buf_index({matrix_index.row(), matrix_index.column()});
+
+ node_buf.at(e.current()) = input_buf->at(buf_index);
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+{
+ auto input_data = annot_data(matrix_dec->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(matrix_dec->input()) == loco::Domain::Matrix,
+ "Input domain should be Matrix");
+ validate(input_data->shape()->rank() == 2, "Input data rank must be 2");
+
+ std::unique_ptr<NodeData> matrix_dec_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ matrix_dec_data = matrix_decode<int32_t>(matrix_dec, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ matrix_dec_data = matrix_decode<float>(matrix_dec, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(matrix_dec_data != nullptr);
+
+ annot_data(matrix_dec, std::move(matrix_dec_data));
+ annot_domain(matrix_dec, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatrixEncode.cpp b/compiler/locomotiv/src/Node/MatrixEncode.cpp
new file mode 100644
index 000000000..e3554e15a
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MatrixEncode.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+
+#include <stdexcept>
+#include <cassert>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::IndexEnumerator;
+
+template <typename T>
+std::unique_ptr<locomotiv::NodeData> matrix_encode(const loco::MatrixEncode *node,
+ const Buffer<T> *input_buf)
+{
+ auto encoder = node->encoder();
+
+ // Make TensorShape from input
+ loco::TensorShape input_shape;
+ input_shape.rank(input_buf->shape().rank());
+ assert(input_shape.rank() == 2);
+ for (uint32_t i = 0; i < input_shape.rank(); ++i)
+ {
+ input_shape.dim(i) = input_buf->shape().dim(i);
+ }
+
+ loco::MatrixShape node_shape = encoder->shape(input_shape);
+
+ // Make HW buffer from MatrixShape
+ Buffer<T> node_buf =
+ make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()});
+
+ // Copy buffer in an order arranged by encoder
+ for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance())
+ {
+ loco::MatrixIndex index;
+ index.row() = e.current().at(0);
+ index.column() = e.current().at(1);
+
+ node_buf.at(e.current()) = input_buf->at(encoder->value(index));
+ }
+
+ return locomotiv::make_data(node_buf);
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixEncode *matrix_enc)
+{
+ auto input_data = annot_data(matrix_enc->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(matrix_enc->input()) == loco::Domain::Tensor,
+ "Input domain should be Tensor");
+ validate(input_data->shape()->rank() == 2, "Input data rank must be 2");
+
+ std::unique_ptr<NodeData> matrix_enc_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_buf = input_data->as_s32_bufptr();
+ matrix_enc_data = matrix_encode<int32_t>(matrix_enc, input_buf);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ matrix_enc_data = matrix_encode<float>(matrix_enc, input_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(matrix_enc_data != nullptr);
+
+ annot_data(matrix_enc, std::move(matrix_enc_data));
+ annot_domain(matrix_enc, loco::Domain::Matrix);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp
new file mode 100644
index 000000000..5d92f89f5
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <limits>
+#include <cassert>
+#include <algorithm>
+#include <stdexcept>
+
+namespace
+{
+
+/**
+ * @brief Compute 1D output size based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and back pad
+ */
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
+ uint32_t stride)
+{
+ assert((image_size + whole_pad - filter_size) % stride == 0);
+ return (image_size + whole_pad - filter_size) / stride + 1;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+template <typename T>
+nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
+ const Buffer<T> *ifm_buf)
+{
+ auto ifm_shape = ifm_buf->shape();
+
+ const uint32_t batches = ifm_shape.dim(0);
+ const uint32_t depth = ifm_shape.dim(3);
+
+ const uint32_t ifm_height = ifm_shape.dim(1);
+ const uint32_t ifm_width = ifm_shape.dim(2);
+
+ const uint32_t window_height = maxpool2d->window()->vertical();
+ const uint32_t window_width = maxpool2d->window()->horizontal();
+
+ const uint32_t stride_height = maxpool2d->stride()->vertical();
+ const uint32_t stride_width = maxpool2d->stride()->horizontal();
+
+ const uint32_t pad_top = maxpool2d->pad()->top();
+ const uint32_t pad_bottom = maxpool2d->pad()->bottom();
+
+ const uint32_t pad_left = maxpool2d->pad()->left();
+ const uint32_t pad_right = maxpool2d->pad()->right();
+
+ const uint32_t output_height =
+ compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
+ const uint32_t output_width =
+ compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
+
+ // prepare output buffer
+ Shape output_shape{batches, output_height, output_width, depth};
+ auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t channel = 0; channel < depth; ++channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_left;
+ const int in_y_origin = (out_y * stride_height) - pad_top;
+
+ // Compute the boundaries of the filter region clamped so as to
+ // ensure that the filter window fits in the input array.
+ const uint32_t filter_x_start = std::max(0, -in_x_origin);
+ const uint32_t filter_x_end = std::min(window_width, ifm_width - in_x_origin);
+
+ const uint32_t filter_y_start = std::max(0, -in_y_origin);
+ const uint32_t filter_y_end = std::min(window_height, ifm_height - in_y_origin);
+
+ T max = std::numeric_limits<T>::lowest();
+
+ for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
+ {
+ const uint32_t in_x = in_x_origin + filter_x;
+ const uint32_t in_y = in_y_origin + filter_y;
+ max = std::max(max, ifm_buf->at(Index({batch, in_y, in_x, channel})));
+ }
+ }
+
+ output_buf.at(Index({batch, out_y, out_x, channel})) = max;
+ }
+ }
+ }
+ }
+
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+{
+ auto ifm_data = annot_data(maxpool2d->ifm());
+
+ validate(ifm_data, "Can't find input data of MaxPool2D");
+ validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4");
+ validate(annot_domain(maxpool2d->ifm()) == loco::Domain::Feature,
+ "ifm of MaxPool2D is not Feature");
+
+ std::unique_ptr<NodeData> maxpool2d_data = nullptr;
+
+ switch (ifm_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+
+ auto maxpool2d_buf = maxPool2D<float>(maxpool2d, ifm_buf);
+
+ maxpool2d_data = make_data(maxpool2d_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(maxpool2d_data != nullptr);
+
+ annot_data(maxpool2d, std::move(maxpool2d_data));
+ annot_domain(maxpool2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
new file mode 100644
index 000000000..9d877a96b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h,
+ const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top,
+ const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right)
+{
+ // Let's make FeatureEncode-MaxPool2D graph
+ auto g = loco::make_graph();
+ auto enc = g->nodes()->create<loco::FeatureEncode>();
+
+ // Fill output data of FeatureEncode from ifm
+ auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(enc_buf);
+ locomotiv::annot_data(enc, std::move(enc_data));
+ locomotiv::annot_domain(enc, loco::Domain::Feature);
+
+ // build MaxPool2D
+ auto maxpool2d = g->nodes()->create<loco::MaxPool2D>();
+ maxpool2d->ifm(enc);
+ maxpool2d->window()->vertical(window_v);
+ maxpool2d->window()->horizontal(window_h);
+ maxpool2d->stride()->vertical(stride_v);
+ maxpool2d->stride()->horizontal(stride_h);
+ maxpool2d->pad()->top(pad_top);
+ maxpool2d->pad()->bottom(pad_bottom);
+ maxpool2d->pad()->left(pad_left);
+ maxpool2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(maxpool2d);
+
+ // get result of calculation
+ auto maxpool2d_data = locomotiv::annot_data(maxpool2d);
+
+ // check the result
+ ASSERT_NE(maxpool2d_data, nullptr);
+ ASSERT_TRUE(maxpool2d_data->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(maxpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(maxpool2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ value = tf.random_normal([1, 3, 3, 1], stddev=1)
+ maxpool = tf.nn.max_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'VALID',
+ data_format="NHWC")
+ with tf.Session() as sess:
+ print(sess.run(maxpool))
+*/
+
+TEST(NodeExecution_MaxPool2D, f32_1x3x3x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -1.5510627, 0.3653609, 1.9002001,
+ -0.15861237, -0.32944828, 1.2053918,
+ 0.50054574, -0.8533826, 0.131492,
+ };
+
+ const float ofm[] =
+ {
+ 0.3653609, 1.9002001,
+ 0.50054574, 1.2053918
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // input shape , output shape
+ 2, 2, // kernel
+ 1, 1, // stride
+ 0, 0, 0, 0 // padding
+ );
+}
+
+TEST(NodeExecution_MaxPool2D, with_padding)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25
+ };
+
+ const float ofm[] =
+ {
+ 7, 9, 10,
+ 17, 19, 20,
+ 22, 24, 25
+ };
+
+ run_test(ifm, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, // input shape , output shape
+ 3, 3, // kernel
+ 2, 2, // stride
+ 1, 1, 1, 1 // padding - this mimics SAME padding
+ );
+}
+// clang-format on
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp
new file mode 100644
index 000000000..c482d8b04
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "UserData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull)
+{
+// TODO Remove deprecated code
+#if 0
+ validate(annot_data(pull), "Data for Pull is not ready");
+
+ validate(annot_domain(pull) == loco::Domain::Tensor, "Domain for Pull is not Tensor");
+
+ // DO NOTHING
+#endif
+
+ auto input_data = user_data(pull);
+
+ validate(input_data, "Input not ready");
+ // User always passes a "Tensor"
+
+ std::unique_ptr<NodeData> pull_data = nullptr;
+
+ // Q. Is it possible to use generic one?
+ switch (input_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto input_bufptr = input_data->as_s32_bufptr();
+ pull_data = make_data(*input_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ pull_data = make_data(*input_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(pull_data != nullptr);
+ annot_data(pull, std::move(pull_data));
+ annot_domain(pull, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Pull.test.cpp b/compiler/locomotiv/src/Node/Pull.test.cpp
new file mode 100644
index 000000000..53e78776b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Pull.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "UserData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Pull, check_data_ready)
+{
+ // Make graph with Pull node only
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+
+ // Data not ready yet
+ ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull));
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::user_data(pull, std::move(pull_data));
+
+// The behavior of Pull is now consistent with that of other nodes.
+// - annot_data and annot_domain is available after evaluating that "pull" node.
+// TODO Remove this
+#if 0
+ // Domain not ready yet
+ ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull));
+
+ // Set Domain
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+#endif
+
+ // Valid run
+ ASSERT_NO_THROW(locomotiv::NodeExecution::get().run(pull));
+}
diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp
new file mode 100644
index 000000000..fc5808b15
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Push.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push)
+{
+ auto from_data = annot_data(push->from());
+
+ validate(from_data, "Ingredient not ready");
+ validate(annot_domain(push->from()) == loco::Domain::Tensor, "Ingredient of Push is not tensor");
+
+ std::unique_ptr<NodeData> push_data = nullptr;
+
+ switch (from_data->dtype())
+ {
+ case loco::DataType::S32:
+ {
+ auto from_bufptr = from_data->as_s32_bufptr();
+ push_data = make_data(*from_bufptr);
+ break;
+ }
+ case loco::DataType::FLOAT32:
+ {
+ auto from_bufptr = from_data->as_f32_bufptr();
+ push_data = make_data(*from_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(push_data != nullptr);
+ annot_data(push, std::move(push_data));
+ annot_domain(push, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Push.test.cpp b/compiler/locomotiv/src/Node/Push.test.cpp
new file mode 100644
index 000000000..be8f1e4e9
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Push.test.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Push, s32)
+{
+ // Make pull-push graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::S32);
+ pull->shape({1});
+ auto push = g->nodes()->create<loco::Push>();
+ push->from(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 42;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(push);
+
+ auto push_data = locomotiv::annot_data(push);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(push_data->dtype(), loco::DataType::S32);
+ ASSERT_EQ(*(push_data->shape()), Shape{1});
+ ASSERT_EQ(push_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Push, f32)
+{
+ // Make pull-push graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1});
+ auto push = g->nodes()->create<loco::Push>();
+ push->from(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ pull_buf.at(Index{0}) = 3.14f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(push);
+
+ auto push_data = locomotiv::annot_data(push);
+ ASSERT_NE(push_data, nullptr);
+ ASSERT_EQ(push_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(push_data->shape()), Shape{1});
+ ASSERT_FLOAT_EQ(push_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0}));
+
+ ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/ReLU.cpp b/compiler/locomotiv/src/Node/ReLU.cpp
new file mode 100644
index 000000000..c0f8620e7
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+namespace
+{
+
+inline float relu_ew(float val) { return val > 0.0f ? val : 0.0f; }
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return relu_ew(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ReLU *relu)
+{
+ Func f;
+
+ eltwise_unary(relu, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ReLU.test.cpp b/compiler/locomotiv/src/Node/ReLU.test.cpp
new file mode 100644
index 000000000..0ddd01d0f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ReLU, f32)
+{
+ // Make pull-relu graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2});
+ auto relu = g->nodes()->create<loco::ReLU>();
+ relu->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2});
+ pull_buf.at(Index{0}) = -10.0f;
+ pull_buf.at(Index{1}) = 10.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(relu);
+
+ auto relu_data = locomotiv::annot_data(relu);
+ ASSERT_NE(relu_data, nullptr);
+ ASSERT_EQ(relu_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(relu_data->shape()), Shape{2});
+ ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{1}), 10.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(relu), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/ReLU6.cpp b/compiler/locomotiv/src/Node/ReLU6.cpp
new file mode 100644
index 000000000..586c015fc
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU6.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+// TODO Remove deprecated code
+#if 0
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+#include <cassert>
+#include <stdexcept>
+#endif
+
+namespace
+{
+
+inline float relu6_ew(float val) { return val < 0.0f ? 0.0f : (val < 6.0f ? val : 6.0f); }
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ReLU6 *relu6)
+{
+// TODO Remove deprecated code
+#if 0
+ auto input_data = annot_data(relu6->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(relu6->input()) != loco::Domain::Unknown,
+ "Input domain of ReLU is Unknown");
+
+ std::unique_ptr<NodeData> relu6_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto *shape = input_data->shape();
+ auto relu6_buf = make_buffer<float, LexicalLayout>(*shape);
+
+ for (IndexEnumerator e{*shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ relu6_buf.at(index) = relu6_ew(input_bufptr->at(index));
+ }
+
+ relu6_data = make_data(relu6_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(relu6_data != nullptr);
+ annot_data(relu6, std::move(relu6_data));
+ annot_domain(relu6, annot_domain(relu6->input()));
+#endif
+
+ struct Func final : public UnaryFunc
+ {
+ float apply(float v) const final { return relu6_ew(v); }
+ };
+
+ Func f;
+
+ eltwise_unary(relu6, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ReLU6.test.cpp b/compiler/locomotiv/src/Node/ReLU6.test.cpp
new file mode 100644
index 000000000..07f6af23f
--- /dev/null
+++ b/compiler/locomotiv/src/Node/ReLU6.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_ReLU6, f32)
+{
+ // Make pull-relu6 graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 2});
+ auto relu6 = g->nodes()->create<loco::ReLU6>();
+ relu6->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2, 2});
+ pull_buf.at(Index{0, 0}) = -5.0f;
+ pull_buf.at(Index{0, 1}) = 6.0f;
+ pull_buf.at(Index{1, 0}) = 7.0f;
+ pull_buf.at(Index{1, 1}) = -8.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(relu6);
+
+ auto relu6_data = locomotiv::annot_data(relu6);
+ ASSERT_NE(relu6_data, nullptr);
+ ASSERT_EQ(relu6_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(relu6_data->shape()), Shape({2, 2}));
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 1}), 6.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 0}), 6.0f);
+ ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 1}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(relu6), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp
new file mode 100644
index 000000000..ac1672024
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::num_elements;
+
+#include <cassert>
+#include <stdexcept>
+#include <cstring>
+#include <vector>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+ auto input_data = annot_data(reshape->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(reshape->input()) == loco::Domain::Tensor,
+ "Input domain of Reshape is not Tensor");
+
+ std::unique_ptr<NodeData> reshape_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto *input_shape = input_data->shape();
+
+ using Shape = nncc::core::ADT::tensor::Shape;
+ std::unique_ptr<Shape> output_shape(new Shape());
+
+ output_shape->resize(reshape->rank());
+ for (uint32_t axis = 0; axis < output_shape->rank(); ++axis)
+ {
+ output_shape->dim(axis) = reshape->dim(axis).value();
+ }
+
+ auto reshape_bufptr = make_buffer<float, LexicalLayout>(*output_shape);
+
+ float *input_ptr = const_cast<float *>(input_bufptr->base());
+ uint64_t input_len = num_elements(*input_shape) * sizeof(float);
+
+ float *output_ptr = reshape_bufptr.base();
+
+ assert(input_len == num_elements(*output_shape) * sizeof(float));
+ memcpy(output_ptr, input_ptr, input_len);
+
+ reshape_data = make_data(reshape_bufptr);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(reshape_data != nullptr);
+ annot_data(reshape, std::move(reshape_data));
+ annot_domain(reshape, annot_domain(reshape->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Reshape.test.cpp b/compiler/locomotiv/src/Node/Reshape.test.cpp
new file mode 100644
index 000000000..8e54a16df
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Reshape.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Reshape, f32)
+{
+ // Make pull-reshape graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({4});
+ auto reshape = g->nodes()->create<loco::Reshape<loco::ReshapeType::Fixed>>();
+ reshape->input(pull);
+ reshape->shape({2, 2});
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4});
+ pull_buf.at(Index{0}) = 0.0f;
+ pull_buf.at(Index{1}) = 1.1f;
+ pull_buf.at(Index{2}) = 2.2f;
+ pull_buf.at(Index{3}) = 3.3f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reshape);
+
+ auto reshape_data = locomotiv::annot_data(reshape);
+ ASSERT_NE(reshape_data, nullptr);
+ ASSERT_EQ(reshape_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reshape_data->shape()), (Shape{2, 2}));
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 1}), 1.1f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 0}), 2.2f);
+ ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 1}), 3.3f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reshape), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp
new file mode 100644
index 000000000..352598b27
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Softmax.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+#include <cmath>
+
+namespace
+{
+
+Index reduce_index(const Index &index, uint32_t axis)
+{
+ Index r_index;
+
+ r_index.resize(index.rank());
+ for (uint32_t i = 0; i < index.rank(); ++i)
+ r_index.at(i) = index.at(i);
+ r_index.at(axis) = 0;
+
+ return r_index;
+}
+
+Shape reduce_shape(const Shape &shape, uint32_t axis)
+{
+ Shape r_shape;
+
+ r_shape.resize(shape.rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ r_shape.dim(i) = shape.dim(i);
+ r_shape.dim(axis) = 1;
+
+ return r_shape;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax)
+{
+ auto input_data = annot_data(softmax->input());
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(softmax->input()) == loco::Domain::Tensor,
+ "Input domain of TensorSoftmax is not Tensor");
+
+ std::unique_ptr<NodeData> softmax_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto axis = softmax->axis();
+
+ auto *input_shape = input_data->shape();
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto softmax_buf = make_buffer<float, LexicalLayout>(*input_data->shape());
+
+ auto reduce_sum_shape = reduce_shape(*input_shape, axis);
+ auto reduce_sum_bufptr = make_buffer<float, LexicalLayout>(reduce_sum_shape);
+
+ for (IndexEnumerator e{*input_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduce_index(index, axis);
+
+ reduce_sum_bufptr.at(r_index) += exp(input_bufptr->at(index));
+ }
+
+ for (IndexEnumerator e{*input_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduce_index(index, axis);
+
+ softmax_buf.at(index) = exp(input_bufptr->at(index)) / reduce_sum_bufptr.at(r_index);
+ }
+
+ softmax_data = make_data(softmax_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(softmax_data != nullptr);
+ annot_data(softmax, std::move(softmax_data));
+ annot_domain(softmax, annot_domain(softmax->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Softmax.test.cpp b/compiler/locomotiv/src/Node/Softmax.test.cpp
new file mode 100644
index 000000000..21d240275
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Softmax.test.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Softmax, f32)
+{
+ // Make pull-softmax graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({2, 2});
+ auto softmax = g->nodes()->create<loco::TensorSoftmax>();
+ softmax->input(pull);
+ softmax->axis(1);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>({2, 2});
+ pull_buf.at(Index{0, 0}) = 1.1f;
+ pull_buf.at(Index{0, 1}) = 1.1f;
+ pull_buf.at(Index{1, 0}) = 3.3f;
+ pull_buf.at(Index{1, 1}) = 3.3f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(softmax);
+
+ auto kShape = Shape{2, 2};
+ auto softmax_data = locomotiv::annot_data(softmax);
+ ASSERT_NE(softmax_data, nullptr);
+ ASSERT_EQ(softmax_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(softmax_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 0}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 1}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 0}), 0.5f);
+ ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 1}), 0.5f);
+
+ ASSERT_EQ(locomotiv::annot_domain(softmax), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/Tanh.cpp b/compiler/locomotiv/src/Node/Tanh.cpp
new file mode 100644
index 000000000..78d329e7c
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Tanh.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include <cmath>
+
+namespace
+{
+
+struct Func final : public locomotiv::UnaryFunc
+{
+ float apply(float v) const final { return std::tanh(v); }
+};
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Tanh *tanh)
+{
+ Func f;
+
+ eltwise_unary(tanh, f);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Tanh.test.cpp b/compiler/locomotiv/src/Node/Tanh.test.cpp
new file mode 100644
index 000000000..78c3a13ba
--- /dev/null
+++ b/compiler/locomotiv/src/Node/Tanh.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Tanh, f32)
+{
+ // Make pull-Tanh graph
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({3});
+ auto tanh = g->nodes()->create<loco::Tanh>();
+ tanh->input(pull);
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{3});
+ pull_buf.at(Index{0}) = 0.0f;
+ pull_buf.at(Index{1}) = 1.0f;
+ pull_buf.at(Index{2}) = -1.0f;
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tanh);
+
+ auto tanh_data = locomotiv::annot_data(tanh);
+ ASSERT_NE(tanh_data, nullptr);
+ ASSERT_EQ(tanh_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(tanh_data->shape()), Shape{3});
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{1}), 0.761594f);
+ ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{2}), -0.761594f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tanh), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
new file mode 100644
index 000000000..010ca6821
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+ auto input_data = annot_data(tensor_broadcast->input());
+
+ // Calculate output shape
+ Shape input_shape = *(input_data->shape());
+
+ // TODO Reuse "ShapeInferenceService"
+ Shape output_shape;
+
+ output_shape.resize(input_shape.rank());
+ for (uint32_t axis = 0; axis < input_shape.rank(); ++axis)
+ {
+ if (tensor_broadcast->mapping()->defined(axis))
+ {
+ assert(input_shape.dim(axis) == 1); // Required by TensorBroadcast definition
+ output_shape.dim(axis) = tensor_broadcast->mapping()->dim(axis).value();
+ }
+ else
+ {
+ output_shape.dim(axis) = input_shape.dim(axis);
+ }
+ }
+
+ assert(input_shape.rank() == output_shape.rank());
+
+ uint32_t const rank = input_shape.rank();
+
+ std::unique_ptr<NodeData> output_data = nullptr;
+
+ switch (input_data->dtype())
+ {
+ // TODO Use type-generic implementation!
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto output_buf = make_buffer<float, LexicalLayout>(output_shape);
+
+ for (IndexEnumerator e{output_shape}; e.valid(); e.advance())
+ {
+ auto input_index = e.current();
+ const auto &output_index = e.current();
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ if (tensor_broadcast->mapping()->defined(axis))
+ {
+ input_index.at(axis) = 0;
+ }
+ }
+
+ output_buf.at(output_index) = input_bufptr->at(input_index);
+ }
+
+ output_data = make_data(output_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("Not yet supported");
+ }
+
+ assert(output_data != nullptr);
+ annot_data(tensor_broadcast, std::move(output_data));
+ annot_domain(tensor_broadcast, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp
new file mode 100644
index 000000000..e8347d737
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_TensorBroadcast, f32)
+{
+ // Create a sample graph w/ TensorBroadcast
+ auto g = loco::make_graph();
+ auto pull = g->nodes()->create<loco::Pull>();
+ pull->dtype(loco::DataType::FLOAT32);
+ pull->shape({1, 1});
+ auto broadcast = g->nodes()->create<loco::TensorBroadcast>();
+ broadcast->input(pull);
+ broadcast->mapping()->dim(0) = 2;
+
+ // Make and assign data to pull node
+ auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1, 1});
+ pull_buf.at(Index{0, 0}) = -1.0f;
+
+ auto pull_data = locomotiv::make_data(pull_buf);
+ locomotiv::annot_data(pull, std::move(pull_data));
+ locomotiv::annot_domain(pull, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(broadcast);
+
+ auto broadcast_data = locomotiv::annot_data(broadcast);
+ ASSERT_NE(broadcast_data, nullptr);
+ ASSERT_EQ(broadcast_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(broadcast_data->shape())), (Shape{2, 1}));
+ ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{1, 0}), -1.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(broadcast), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp
new file mode 100644
index 000000000..5097e55c6
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+{
+ auto lhs_data = annot_data(tensor_concat->lhs());
+ auto rhs_data = annot_data(tensor_concat->rhs());
+ auto axis = tensor_concat->axis();
+
+ validate(lhs_data && rhs_data, "Ingredient not ready");
+ validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype");
+
+ validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor &&
+ annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor,
+ "Some ingredients of TensorConcat is not Tensor");
+
+ // Calculate output shape
+ Shape lhs_shape = *lhs_data->shape();
+ Shape rhs_shape = *rhs_data->shape();
+ Shape concat_shape;
+
+ assert(lhs_shape.rank() == rhs_shape.rank());
+ concat_shape.resize(lhs_shape.rank());
+ for (uint32_t index = 0; index < lhs_shape.rank(); ++index)
+ {
+ if (index == axis)
+ concat_shape.dim(index) = lhs_shape.dim(index) + rhs_shape.dim(index);
+ else
+ {
+ assert(lhs_shape.dim(index) == rhs_shape.dim(index));
+ concat_shape.dim(index) = lhs_shape.dim(index);
+ }
+ }
+ auto left_dim_size = lhs_shape.dim(axis);
+
+ // Copy data from two inputs LHS and RHS to Concat
+ std::unique_ptr<NodeData> concat_data = nullptr;
+ switch (lhs_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto lhs_bufptr = lhs_data->as_f32_bufptr();
+ auto rhs_bufptr = rhs_data->as_f32_bufptr();
+ auto concat_buf = make_buffer<float, LexicalLayout>(concat_shape);
+
+ for (IndexEnumerator e{concat_shape}; e.valid(); e.advance())
+ {
+ const auto &e_index = e.current();
+
+ if (e_index.at(axis) < left_dim_size)
+ {
+ // Left index is same as output index
+ concat_buf.at(e_index) = lhs_bufptr->at(e_index);
+ }
+ else
+ {
+ // Adjust right index to valid range
+ Index r_index = e_index;
+ r_index.at(axis) -= left_dim_size;
+ concat_buf.at(e_index) = rhs_bufptr->at(r_index);
+ }
+ }
+
+ concat_data = make_data(concat_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(concat_data != nullptr);
+ annot_data(tensor_concat, std::move(concat_data));
+ annot_domain(tensor_concat, loco::Domain::Tensor);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConcat.test.cpp b/compiler/locomotiv/src/Node/TensorConcat.test.cpp
new file mode 100644
index 000000000..d71b51524
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConcat.test.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_TensorConcat, f32)
+{
+ // Make (pull, pull)-concat graph
+ auto g = loco::make_graph();
+ auto pull_l = g->nodes()->create<loco::Pull>();
+ pull_l->dtype(loco::DataType::FLOAT32);
+ pull_l->shape({1, 2});
+ auto pull_r = g->nodes()->create<loco::Pull>();
+ pull_r->dtype(loco::DataType::FLOAT32);
+ pull_r->shape({1, 2});
+ auto tconcat = g->nodes()->create<loco::TensorConcat>();
+ tconcat->lhs(pull_l);
+ tconcat->rhs(pull_r);
+ tconcat->axis(0);
+
+ // Make and assign data to pull node
+ auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_l_buf.at(Index{0, 0}) = -1.0f;
+ pull_l_buf.at(Index{0, 1}) = -2.0f;
+ auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_r_buf.at(Index{0, 0}) = 3.0f;
+ pull_r_buf.at(Index{0, 1}) = 4.0f;
+
+ auto pull_l_data = locomotiv::make_data(pull_l_buf);
+ locomotiv::annot_data(pull_l, std::move(pull_l_data));
+ locomotiv::annot_domain(pull_l, loco::Domain::Tensor);
+ auto pull_r_data = locomotiv::make_data(pull_r_buf);
+ locomotiv::annot_data(pull_r, std::move(pull_r_data));
+ locomotiv::annot_domain(pull_r, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tconcat);
+
+ auto concat_data = locomotiv::annot_data(tconcat);
+ ASSERT_NE(concat_data, nullptr);
+ ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(concat_data->shape())), (Shape{2, 2}));
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_TensorConcat, f32_2)
+{
+ // Make (pull, pull)-concat graph
+ auto g = loco::make_graph();
+ auto pull_l = g->nodes()->create<loco::Pull>();
+ pull_l->dtype(loco::DataType::FLOAT32);
+ pull_l->shape({1, 2});
+ auto pull_r = g->nodes()->create<loco::Pull>();
+ pull_r->dtype(loco::DataType::FLOAT32);
+ pull_r->shape({3, 2});
+ auto tconcat = g->nodes()->create<loco::TensorConcat>();
+ tconcat->lhs(pull_l);
+ tconcat->rhs(pull_r);
+ tconcat->axis(0);
+
+ // Make and assign data to pull node
+ auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2});
+ pull_l_buf.at(Index{0, 0}) = -1.0f;
+ pull_l_buf.at(Index{0, 1}) = -2.0f;
+ auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{3, 2});
+ pull_r_buf.at(Index{0, 0}) = 3.0f;
+ pull_r_buf.at(Index{0, 1}) = 4.0f;
+ pull_r_buf.at(Index{1, 0}) = -3.0f;
+ pull_r_buf.at(Index{1, 1}) = -4.0f;
+ pull_r_buf.at(Index{2, 0}) = 5.0f;
+ pull_r_buf.at(Index{2, 1}) = 6.0f;
+
+ auto pull_l_data = locomotiv::make_data(pull_l_buf);
+ locomotiv::annot_data(pull_l, std::move(pull_l_data));
+ locomotiv::annot_domain(pull_l, loco::Domain::Tensor);
+ auto pull_r_data = locomotiv::make_data(pull_r_buf);
+ locomotiv::annot_data(pull_r, std::move(pull_r_data));
+ locomotiv::annot_domain(pull_r, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(tconcat);
+
+ auto concat_data = locomotiv::annot_data(tconcat);
+ ASSERT_NE(concat_data, nullptr);
+ ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ((*(concat_data->shape())), (Shape{4, 2}));
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 0}), -3.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 1}), -4.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 0}), 5.0f);
+ ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 1}), 6.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
new file mode 100644
index 000000000..989afaf94
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad)
+{
+ auto input_data = annot_data(pad->input());
+ auto input_domain = annot_domain(pad->input());
+ validate(input_data, "Input not ready");
+ validate(input_domain == loco::Domain::Tensor, "Input domain of TensorConstantPad is not Tensor");
+
+ auto input_shape = input_data->shape();
+ const uint32_t input_rank = input_shape->rank();
+
+ auto padding = pad->padding();
+ validate(input_rank == padding->rank(), "input and padding should have same rank");
+
+ auto constant_node = pad->constant();
+ auto constant_data = annot_data(constant_node);
+ validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type");
+ validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1,
+ "constant should have one rank with one dimension at zero axis");
+
+ std::unique_ptr<NodeData> pad_data = nullptr;
+ Index base_index;
+ base_index.resize(input_rank);
+
+ // Tensor is padded by relocating its base.
+ // padded output index = input index + base index
+ for (uint32_t axis = 0; axis < padding->rank(); axis++)
+ {
+ base_index.at(axis) = padding->front(axis);
+ }
+
+ // calculate output shape
+ Shape output_shape;
+ output_shape.resize(input_rank);
+ for (uint32_t i = 0; i < input_rank; i++)
+ {
+ output_shape.dim(i) = input_shape->dim(i) + padding->front(i) + padding->back(i);
+ }
+
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_buf = input_data->as_f32_bufptr();
+ auto constant_data_buf = constant_data->as_f32_bufptr();
+ const auto constant_value = constant_data_buf->at(Index{0});
+
+ auto output_buf = make_buffer<float, LexicalLayout>(output_shape);
+
+ for (IndexEnumerator ie{*input_shape}, oe{output_shape}; oe.valid(); oe.advance())
+ {
+ auto input_index = ie.current();
+ auto output_index = oe.current();
+
+ if ((input_index + base_index) == output_index)
+ {
+ output_buf.at(output_index) = input_buf->at(input_index);
+ ie.advance();
+ }
+ else
+ {
+ output_buf.at(output_index) = constant_value;
+ }
+ }
+
+ pad_data = make_data(output_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(pad_data != nullptr);
+ annot_data(pad, std::move(pad_data));
+ annot_domain(pad, annot_domain(pad->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp
new file mode 100644
index 000000000..0f60c5f85
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+
+TEST(NodeExecution_Pad, tensor_constant_pad_4_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({1, 2, 2, 1});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{1, 2, 2, 1});
+ inputTensor_buf.at(Index{0, 0, 0, 0}) = 1.0f;
+ inputTensor_buf.at(Index{0, 0, 1, 0}) = 2.0f;
+ inputTensor_buf.at(Index{0, 1, 0, 0}) = 3.0f;
+ inputTensor_buf.at(Index{0, 1, 1, 0}) = 4.0f;
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+
+ auto padding = pad->padding();
+ padding->rank(4);
+ padding->front(0) = 0;
+ padding->back(0) = 0;
+ padding->front(1) = 3;
+ padding->back(1) = 1;
+ padding->front(2) = 1;
+ padding->back(2) = 1;
+ padding->front(3) = 0;
+ padding->back(3) = 0;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({1, 6, 4, 1}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 1, 0}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 2, 0}), 2.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 1, 0}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 2, 0}), 4.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 0, 0, 0}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Pad, tensor_constant_pad_1_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({3});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{3});
+ inputTensor_buf.at(Index{0}) = 1.0f;
+ inputTensor_buf.at(Index{1}) = 5.0f;
+ inputTensor_buf.at(Index{2}) = 3.0f;
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+ auto padding = pad->padding();
+ padding->rank(1);
+ padding->front(0) = 2;
+ padding->back(0) = 1;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({6}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0}), 0.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1}), 0.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{2}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{3}), 5.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{4}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{5}), 0.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Pad, tensor_constant_pad_6_dim)
+{
+ auto g = loco::make_graph();
+
+ auto inputTensor = g->nodes()->create<loco::Pull>();
+ inputTensor->dtype(loco::DataType::FLOAT32);
+ inputTensor->shape({2, 1, 3, 2, 1, 2});
+ auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{2, 1, 3, 2, 1, 2});
+ int a, b, c, d, e, f;
+ float dummy = 1.0f;
+ for (uint32_t a = 0; a < 2; a++)
+ {
+ for (uint32_t b = 0; b < 1; b++)
+ {
+ for (uint32_t c = 0; c < 3; c++)
+ {
+ for (uint32_t d = 0; d < 2; d++)
+ {
+ for (uint32_t e = 0; e < 1; e++)
+ {
+ for (uint32_t f = 0; f < 2; f++)
+ {
+ inputTensor_buf.at(Index{a, b, c, d, e, f}) = dummy++;
+ }
+ }
+ }
+ }
+ }
+ }
+ auto inputTensor_data = locomotiv::make_data(inputTensor_buf);
+ locomotiv::annot_data(inputTensor, std::move(inputTensor_data));
+ locomotiv::annot_domain(inputTensor, loco::Domain::Tensor);
+
+ auto constant = g->nodes()->create<loco::ConstGen>();
+ constant->dtype(loco::DataType::FLOAT32);
+ constant->shape({1});
+ auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1});
+ constant_buf.at(Index{0}) = 0.0f;
+ auto constant_data = locomotiv::make_data(constant_buf);
+ locomotiv::annot_data(constant, std::move(constant_data));
+ locomotiv::annot_domain(constant, loco::Domain::Tensor);
+
+ auto pad = g->nodes()->create<loco::TensorConstantPad>();
+ pad->input(inputTensor);
+ pad->constant(constant);
+ auto padding = pad->padding();
+
+ padding->rank(6);
+ padding->front(0) = 1;
+ padding->back(0) = 1;
+ padding->front(1) = 0;
+ padding->back(1) = 0;
+ padding->front(2) = 1;
+ padding->back(2) = 2;
+ padding->front(3) = 2;
+ padding->back(3) = 1;
+ padding->front(4) = 0;
+ padding->back(4) = 0;
+ padding->front(5) = 1;
+ padding->back(5) = 2;
+
+ locomotiv::NodeExecution::get().run(pad);
+
+ auto pad_data = locomotiv::annot_data(pad);
+ ASSERT_NE(pad_data, nullptr);
+ ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(pad_data->shape()), Shape({4, 1, 6, 5, 1, 5}));
+
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 1}), 1.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 2}), 2.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 1}), 3.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 2}), 4.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 1}), 5.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 2}), 6.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 1}), 7.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 2}), 8.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 1}), 9.0f);
+ ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 2}), 10.0f);
+
+ ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp
new file mode 100644
index 000000000..fae7a75c5
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Buffer;
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+Index reduced_index(const Index &index, const loco::TensorAxisSet &axes)
+{
+ Index r_index;
+
+ r_index.resize(index.rank());
+ for (uint32_t i = 0; i < index.rank(); ++i)
+ r_index.at(i) = (axes.defined(i)) ? 0 : index.at(i);
+
+ return r_index;
+}
+
+Shape reduced_shape(const Shape &shape, const loco::TensorAxisSet &axes)
+{
+ Shape r_shape;
+
+ r_shape.resize(shape.rank());
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ r_shape.dim(i) = (axes.defined(i)) ? 1 : shape.dim(i);
+
+ return r_shape;
+}
+
+} // namespace
+
+namespace
+{
+
+template <typename T, loco::ReduceFunc F> struct ReduceFunction
+{
+ static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+ {
+ throw std::runtime_error("Not supported ReduceFunc type");
+ }
+};
+
+template <typename T> struct ReduceFunction<T, loco::ReduceFunc::Mean>
+{
+ static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes)
+ {
+ for (IndexEnumerator e{rhs.shape()}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ const auto r_index = reduced_index(index, axes);
+
+ lhs.at(r_index) += rhs.at(index);
+ }
+
+ uint32_t r_cnt = 1;
+ for (uint32_t i = 0; i < rhs.shape().rank(); ++i)
+ if (axes.defined(i))
+ r_cnt *= rhs.shape().dim(i);
+
+ for (IndexEnumerator e{lhs.shape()}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ lhs.at(index) /= static_cast<T>(r_cnt);
+ }
+ }
+};
+
+template <typename T>
+void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
+{
+ switch (node.func())
+ {
+ case loco::ReduceFunc::Mean:
+ ReduceFunction<T, loco::ReduceFunc::Mean>::apply(lhs, rhs, *node.axes());
+ break;
+
+ // TODO Support more ReduceFunc type
+ default:
+ break;
+ }
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node)
+{
+ auto input_data = annot_data(node->input());
+ auto input_shape = input_data->shape();
+
+ validate(input_data, "Input not ready");
+ validate(annot_domain(node->input()) == loco::Domain::Tensor,
+ "Input domain of TensorReduce is not Tensor");
+
+ std::unique_ptr<NodeData> reduce_data = nullptr;
+ Shape r_shape = reduced_shape(*input_shape, *node->axes());
+ switch (input_data->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ auto input_bufptr = input_data->as_f32_bufptr();
+ auto reduce_buf = make_buffer<float, LexicalLayout>(r_shape);
+
+ apply(reduce_buf, *input_bufptr, *node);
+
+ reduce_data = make_data(reduce_buf);
+ break;
+ }
+ default:
+ throw std::runtime_error("NYI for this DataType");
+ }
+
+ assert(reduce_data != nullptr);
+ annot_data(node, std::move(reduce_data));
+ annot_domain(node, annot_domain(node->input()));
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.test.cpp b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
new file mode 100644
index 000000000..68398cacd
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TensorReduce.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <gtest/gtest.h>
+
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_0)
+{
+ // Make pull-TensorReduce(Mean) graph
+ auto g = loco::make_graph();
+ auto pull_input = g->nodes()->create<loco::Pull>();
+ pull_input->dtype(loco::DataType::FLOAT32);
+ pull_input->shape({1, 2, 2});
+ auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+ reduce_node->input(pull_input);
+ reduce_node->axes()->insert(0);
+ reduce_node->axes()->insert(1);
+ reduce_node->func(loco::ReduceFunc::Mean);
+
+ // Make and assign data to pull node
+ auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+ pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+ pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+ pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+ pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+ auto pull_input_data = locomotiv::make_data(pull_input_buf);
+ locomotiv::annot_data(pull_input, std::move(pull_input_data));
+ locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reduce_node);
+
+ auto kShape = Shape{1, 1, 2};
+ auto reduce_data = locomotiv::annot_data(reduce_node);
+ ASSERT_NE(reduce_data, nullptr);
+ ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reduce_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.3f);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 1}), 4.4f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
+
+TEST(NodeExecution_Fixed_Reduce_Mean, f32_1)
+{
+ // Make pull-TensorReduce(Mean) graph
+ auto g = loco::make_graph();
+ auto pull_input = g->nodes()->create<loco::Pull>();
+ pull_input->dtype(loco::DataType::FLOAT32);
+ pull_input->shape({1, 2, 2});
+ auto reduce_node = g->nodes()->create<loco::TensorReduce>();
+ reduce_node->input(pull_input);
+ reduce_node->axes()->insert(1);
+ reduce_node->axes()->insert(2);
+ reduce_node->func(loco::ReduceFunc::Mean);
+
+ // Make and assign data to pull node
+ auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2});
+ pull_input_buf.at(Index{0, 0, 0}) = 1.1f;
+ pull_input_buf.at(Index{0, 0, 1}) = 2.2f;
+ pull_input_buf.at(Index{0, 1, 0}) = 5.5f;
+ pull_input_buf.at(Index{0, 1, 1}) = 6.6f;
+ auto pull_input_data = locomotiv::make_data(pull_input_buf);
+ locomotiv::annot_data(pull_input, std::move(pull_input_data));
+ locomotiv::annot_domain(pull_input, loco::Domain::Tensor);
+
+ locomotiv::NodeExecution::get().run(reduce_node);
+
+ auto kShape = Shape{1, 1, 1};
+ auto reduce_data = locomotiv::annot_data(reduce_node);
+ ASSERT_NE(reduce_data, nullptr);
+ ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32);
+ ASSERT_EQ(*(reduce_data->shape()), kShape);
+ ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.85f);
+
+ ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor);
+}
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
new file mode 100644
index 000000000..3ea4f071d
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+#include "Validation.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Compute 1D output size for transposed convolution based on given 1D arguments.
+ *
+ * @param whole_pad Sum of front and rear pad
+ */
+inline uint32_t compute_transposed_out_size(uint32_t input_size, uint32_t whole_pad,
+ uint32_t filter_size, uint32_t stride)
+{
+ return stride * (input_size - 1) + filter_size - whole_pad;
+}
+
+/**
+ * @brief Calculates TransposedConv2D
+ * @note Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
+ const Buffer<IFM_T> *input_buf, const Buffer<FIL_T> *filter_buf)
+{
+ auto input_shape = input_buf->shape();
+ auto filter_shape = filter_buf->shape();
+
+ locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
+ locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
+ locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
+ filter_shape.dim(3) /* depth of filter */,
+ "channel value mismatch");
+
+ const uint32_t input_height = input_shape.dim(1);
+ const uint32_t input_width = input_shape.dim(2);
+
+ const uint32_t filter_height = filter_shape.dim(1);
+ const uint32_t filter_width = filter_shape.dim(2);
+
+ const uint32_t stride_width = tr_conv2d->stride()->horizontal();
+ const uint32_t stride_height = tr_conv2d->stride()->vertical();
+
+ const uint32_t pad_top = tr_conv2d->pad()->top();
+ const uint32_t pad_bottom = tr_conv2d->pad()->bottom();
+
+ const uint32_t pad_left = tr_conv2d->pad()->left();
+ const uint32_t pad_right = tr_conv2d->pad()->right();
+
+ // TODO Support dilations
+
+ const uint32_t output_height =
+ compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
+ const uint32_t output_width =
+ compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
+
+ const uint32_t batches = input_shape.dim(0);
+ const uint32_t input_depth = input_shape.dim(3);
+ const uint32_t output_depth = filter_shape.dim(0); // count of filter
+
+ Shape output_shape{batches, output_height, output_width, output_depth};
+ auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+ // initialize output
+ for (IndexEnumerator e{output_shape}; e.valid(); e.advance())
+ {
+ const auto &index = e.current();
+ output_buf.at(index) = static_cast<RET_T>(0);
+ }
+
+ // Loop through input elements one at a time.
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (uint32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ // Loop through the output elements it will influence
+ const int out_x_origin = (in_x * stride_width) - pad_left;
+ const int out_y_origin = (in_y * stride_height) - pad_top;
+ for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ // Compute output element location
+ const int out_x = out_x_origin + filter_x;
+ const int out_y = out_y_origin + filter_y;
+ // We cannot accumulate out of bounds
+ if ((out_x >= 0) && ((unsigned)out_x < output_width) && (out_y >= 0) &&
+ ((unsigned)out_y < output_height))
+ {
+ auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
+ auto filter_value =
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
+ input_value * filter_value;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+{
+ auto ifm_data = annot_data(tr_conv2d->ifm());
+ auto ker_data = annot_data(tr_conv2d->ker());
+
+ validate(ifm_data, "Can't find input data of TransposedConv2D");
+ validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
+
+ validate(ker_data, "Can't find kernel data of TransposedConv2D");
+ validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
+
+ validate(annot_domain(tr_conv2d->ifm()) == loco::Domain::Feature,
+ "IFM of TransposedConv2D is not feature");
+ validate(annot_domain(tr_conv2d->ker()) == loco::Domain::Filter,
+ "Kernel of TransposedConv2D is not filter");
+
+ std::unique_ptr<NodeData> tr_conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto tr_conv2d_buf = calc_tr_conv2D<float, float, float>(tr_conv2d, ifm_buf, ker_buf);
+
+ tr_conv2d_result = make_data(tr_conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(tr_conv2d_result != nullptr);
+
+ annot_data(tr_conv2d, std::move(tr_conv2d_result));
+ annot_domain(tr_conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
new file mode 100644
index 000000000..bd955a06b
--- /dev/null
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0,
+ const uint32_t pad_left = 0, const uint32_t pad_right = 0)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of FilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+ }
+
+ // build TransposedConv2D
+ auto tr_conv2d = g->nodes()->create<loco::TransposedConv2D>();
+ tr_conv2d->ifm(ifm_enc);
+ tr_conv2d->ker(ker_enc);
+ tr_conv2d->stride()->vertical(stride_v);
+ tr_conv2d->stride()->horizontal(stride_h);
+ tr_conv2d->pad()->top(pad_top);
+ tr_conv2d->pad()->bottom(pad_bottom);
+ tr_conv2d->pad()->left(pad_left);
+ tr_conv2d->pad()->right(pad_right);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(tr_conv2d);
+
+ // get result of calculation
+ auto conv2d_result = locomotiv::annot_data(tr_conv2d);
+
+ // check the result
+ ASSERT_NE(conv2d_result, nullptr);
+ ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(tr_conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/*
+ifm = tf.constant(1.1, shape = [1, 2, 2, 4])
+ker = tf.constant(2.2, shape = [3, 3, 2, 4])
+tr_conv = tf.nn.conv2d_transpose(ifm, ker, output_shape = (1, 5, 5, 2), strides = [1, 2, 2, 1], padding = "VALID")
+
+with tf.Session() as session:
+ tr_conv_data = session.run(tr_conv)
+ */
+TEST(NodeExecution_TransposedConv2D, f32)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ float ifm[1 * 2 * 2 * 4];
+ for (int n = 0; n < 1 * 2 * 2 * 4; n++)
+ ifm[n] = 1.1;
+
+ float ker[2 * 3 * 3 * 4]; // NHWC
+ for (int n = 0; n < 2 * 3 * 3 * 4; n++)
+ ker[n] = 2.2;
+
+ float ofm[1 * 5 * 5 * 2] = {9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 19.36, 19.36, 19.36, 19.36, 38.72, 38.72, 19.36, 19.36, 19.36, 19.36,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68,
+ 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68};
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 2, 2, 4}, Shape{2, 3, 3, 4}, Shape{1, 5, 5, 2}, // shapes of ifm, ker, ofm
+ 2, 2 // stride
+ );
+}
+// clang-format on