diff options
Diffstat (limited to 'compiler/locomotiv/src/Node')
64 files changed, 7170 insertions, 0 deletions
diff --git a/compiler/locomotiv/src/Node/AvgPool2D.cpp b/compiler/locomotiv/src/Node/AvgPool2D.cpp new file mode 100644 index 000000000..ad603badf --- /dev/null +++ b/compiler/locomotiv/src/Node/AvgPool2D.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Compute 1D output size based on given 1D arguments. + * + * @param whole_pad Sum of front and back pad + */ +inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size, + uint32_t stride) +{ + assert((image_size + whole_pad - filter_size) % stride == 0); + return (image_size + whole_pad - filter_size) / stride + 1; +} + +template <typename T> +nncc::core::ADT::tensor::Buffer<T> avgPool2D(const loco::AvgPool2D *avgpool2d, + const Buffer<T> *ifm_buf) +{ + assert(avgpool2d->convention() == loco::AvgPool2D::Convention::Valid || + avgpool2d->convention() == loco::AvgPool2D::Convention::Full); + + auto ifm_shape = ifm_buf->shape(); + + const uint32_t batches = ifm_shape.dim(0); + const uint32_t depth = ifm_shape.dim(3); + + const uint32_t ifm_height = ifm_shape.dim(1); + const uint32_t ifm_width = ifm_shape.dim(2); + + const uint32_t window_height = avgpool2d->window()->vertical(); + const uint32_t window_width = avgpool2d->window()->horizontal(); + + const uint32_t stride_height = avgpool2d->stride()->vertical(); + const uint32_t stride_width = avgpool2d->stride()->horizontal(); + + const uint32_t pad_top = avgpool2d->pad()->top(); + const uint32_t pad_bottom = avgpool2d->pad()->bottom(); + + const uint32_t pad_left = avgpool2d->pad()->left(); + const uint32_t pad_right = avgpool2d->pad()->right(); + + const uint32_t output_height = + compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height); + const uint32_t output_width = + compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width); + + // prepare output buffer + Shape output_shape{batches, output_height, output_width, depth}; + auto output_buf = make_buffer<T, LexicalLayout>(output_shape); + + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t out_y = 0; out_y < output_height; ++out_y) + { + for (uint32_t out_x = 0; out_x < output_width; ++out_x) + { + for (uint32_t channel = 0; channel < depth; ++channel) + { + const int in_x_origin = (out_x * stride_width) - pad_left; + const int in_y_origin = (out_y * stride_height) - pad_top; + + uint32_t f_x0, f_x1, f_y0, f_y1; + if (avgpool2d->convention() == loco::AvgPool2D::Convention::Valid) + { + f_x0 = std::max(0, -in_x_origin); + f_x1 = std::min(window_width, ifm_width - in_x_origin); + f_y0 = std::max(0, -in_y_origin); + f_y1 = std::min(window_height, ifm_height - in_y_origin); + } + else + { + throw std::runtime_error("TODO support AvgPool2D::Convention::Full"); + } + const uint32_t filter_x_start = f_x0; + const uint32_t filter_x_end = f_x1; + + const uint32_t filter_y_start = f_y0; + const uint32_t filter_y_end = f_y1; + + T total = 0; + uint32_t filter_ele_count = 0; + + for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) + { + for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) + { + const uint32_t in_x = in_x_origin + filter_x; + const uint32_t in_y = in_y_origin + filter_y; + total += ifm_buf->at(Index({batch, in_y, in_x, channel})); + filter_ele_count++; + } + } + + assert(filter_ele_count > 0); + output_buf.at(Index({batch, out_y, out_x, channel})) = total / filter_ele_count; + } + } + } + } + + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::AvgPool2D *avgpool2d) +{ + auto ifm_data = annot_data(avgpool2d->ifm()); + + validate(ifm_data, "Can't find input data of AvgPool2D"); + validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4"); + validate(annot_domain(avgpool2d->ifm()) == loco::Domain::Feature, + "ifm of AvgPool2D is not Feature"); + + std::unique_ptr<NodeData> avgpool2d_data = nullptr; + + switch (ifm_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + + auto avgpool2d_buf = avgPool2D<float>(avgpool2d, ifm_buf); + + avgpool2d_data = make_data(avgpool2d_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(avgpool2d_data != nullptr); + + annot_data(avgpool2d, std::move(avgpool2d_data)); + annot_domain(avgpool2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/AvgPool2D.test.cpp b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp new file mode 100644 index 000000000..89e10a35e --- /dev/null +++ b/compiler/locomotiv/src/Node/AvgPool2D.test.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape, + const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h, + const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top, + const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right) +{ + // Let's make FeatureEncode-AvgPool2D graph + auto g = loco::make_graph(); + auto enc = g->nodes()->create<loco::FeatureEncode>(); + + // Fill output data of FeatureEncode from ifm + auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape); + + auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(enc_buf); + locomotiv::annot_data(enc, std::move(enc_data)); + locomotiv::annot_domain(enc, loco::Domain::Feature); + + // build TF AvgPool2D + auto avgpool2d = g->nodes()->create<loco::AvgPool2D>(); + avgpool2d->ifm(enc); + avgpool2d->convention(loco::AvgPool2D::Convention::Valid); + avgpool2d->window()->vertical(window_v); + avgpool2d->window()->horizontal(window_h); + avgpool2d->stride()->vertical(stride_v); + avgpool2d->stride()->horizontal(stride_h); + avgpool2d->pad()->top(pad_top); + avgpool2d->pad()->bottom(pad_bottom); + avgpool2d->pad()->left(pad_left); + avgpool2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(avgpool2d); + + // get result of calculation + auto avgpool2d_data = locomotiv::annot_data(avgpool2d); + + // check the result + ASSERT_NE(avgpool2d_data, nullptr); + ASSERT_TRUE(avgpool2d_data->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(avgpool2d_data->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(avgpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(avgpool2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off +/* ifm and ofm are from the code below: +import tensorflow as tf + +value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261], [-1.1777412]], + [[1.4411974], [0.01408334], [0.06958964], [-0.08663343]], + [[1.3424183], [-0.89015573], [0.2520576], [0.04843695]], + [[-1.6668711], [-0.02187406], [1.9362065], [1.3341236]]]]) +avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding= 'VALID', + data_format="NHWC") +with tf.Session() as sess: + print(sess.run(avgpool)) +*/ +TEST(NodeExecution_AvgPool2D, f32_1x4x4x1_calculation) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + -0.281157, -1.0601869, -0.622261, -1.1777412, + 1.4411974, 0.01408334, 0.06958964, -0.08663343, + 1.3424183, -0.89015573, 0.2520576, 0.04843695, + -1.6668711, -0.02187406, 1.9362065, 1.3341236 + }; + + const float ofm[] = + { + 0.02848421, -0.45426148, + -0.30912063, 0.89270616 + }; + + run_test(ifm, ofm, + Shape{1, 4, 4, 1}, Shape{1, 2, 2, 1}, // input shape , output shape + 2, 2, // kernel + 2, 2, // stride + 0, 0, 0, 0 // padding + ); +} +// clang-format on + +// clang-format off +/* ifm and ofm are from the code below: +import tensorflow as tf + +value = tf.constant([[[[-0.281157], [-1.0601869], [-0.622261]], + [[1.4411974], [0.01408334], [0.06958964]], + [[1.3424183], [-0.89015573], [0.2520576]]]]) +avgpool = tf.nn.avg_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'SAME', + data_format="NHWC") +with tf.Session() as sess: + print(sess.run(avgpool)) +*/ +TEST(NodeExecution_AvgPool2D, f32_1x3x3x1_calculation) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + -0.281157, -1.0601869, -0.622261, + 1.4411974, 0.01408334, 0.06958964, + 1.3424183, -0.89015573, 0.2520576 + }; + + const float ofm[] = + { + 0.02848421, -0.39969373, -0.2763357, + 0.4768858, -0.13860628, 0.16082363, + 0.22613129, -0.31904906, 0.2520576 + }; + + run_test(ifm, ofm, + Shape{1, 3, 3, 1}, Shape{1, 3, 3, 1}, // input shape , output shape + 2, 2, // kernel + 1, 1, // stride + 0, 1, 0, 1 // padding + ); +} +// clang-format on diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp new file mode 100644 index 000000000..0724fb728 --- /dev/null +++ b/compiler/locomotiv/src/Node/BiasAdd.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +#include <cassert> +#include <stdexcept> + +namespace +{ +using locomotiv::NodeData; + +std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data, + uint32_t axis); + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add) +{ + auto input_data = locomotiv::annot_data(bias_add->value()); + auto bias_data = locomotiv::annot_data(bias_add->bias()); + + validate(input_data && bias_data, "Input not ready"); + validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Tensor && + locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias, + "Wrong input domain"); + + std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, bias_add->axis()); + + assert(bias_add_data != nullptr); + annot_data(bias_add, std::move(bias_add_data)); + annot_domain(bias_add, annot_domain(bias_add->value())); +} + +void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add) +{ + auto input_data = locomotiv::annot_data(bias_add->value()); + auto bias_data = locomotiv::annot_data(bias_add->bias()); + + validate(input_data && bias_data, "Input not ready"); + validate(locomotiv::annot_domain(bias_add->value()) == loco::Domain::Feature && + locomotiv::annot_domain(bias_add->bias()) == loco::Domain::Bias, + "Wrong input domain"); + + std::unique_ptr<NodeData> bias_add_data = calc(input_data, bias_data, 3); + + assert(bias_add_data != nullptr); + annot_data(bias_add, std::move(bias_add_data)); + annot_domain(bias_add, loco::Domain::Feature); +} + +} // namespace locomotiv + +namespace +{ +using locomotiv::NodeData; +using locomotiv::validate; +using locomotiv::make_data; + +std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_data, uint32_t axis) +{ + validate(input_data->shape()->dim(axis) == bias_data->shape()->dim(0), "Bias size mismatch"); + + std::unique_ptr<NodeData> bias_add_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + auto bias_bufptr = bias_data->as_f32_bufptr(); + auto bias_add_buf = make_buffer<float, LexicalLayout>(*input_data->shape()); + + auto *shape = input_data->shape(); + + for (IndexEnumerator e{*shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + nncc::core::ADT::tensor::Index bias_index({index.at(axis)}); + bias_add_buf.at(index) = input_bufptr->at(index) + bias_bufptr->at(bias_index); + } + + bias_add_data = make_data(bias_add_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + return bias_add_data; +} + +} // namespace diff --git a/compiler/locomotiv/src/Node/BiasAdd.test.cpp b/compiler/locomotiv/src/Node/BiasAdd.test.cpp new file mode 100644 index 000000000..0ca826673 --- /dev/null +++ b/compiler/locomotiv/src/Node/BiasAdd.test.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + + inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) + bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32) + out = tf.nn.bias_add(inp, bias) + + with tf.Session() as sess: + print(sess.run(out)) + */ + +TEST(NodeExecution_TensorBiasAdd, f32) +{ + float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float bias_val[] = {1.1, 2.1}; + float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1, + 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1}; + + // make BiasAdd(Pull, Const) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp = g->nodes()->create<loco::Pull>(); + { + inp->dtype(loco::DataType::FLOAT32); + inp->shape({1, 3, 3, 2}); + } + + auto bias = g->nodes()->create<loco::BiasEncode>(); + { + // nothing to do + } + + auto bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Tensor>>(); + { + bias_add->value(inp); + bias_add->bias(bias); + bias_add->axis(3); // axis(3) means C in NHWC + } + + // Make and assign data to pull node + auto inp_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance()) + { + inp_buf.at(e.current()) = in_val[n++]; + } + } + + auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2}); + { + int n = 0; + for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance()) + { + bias_buf.at(e.current()) = bias_val[n++]; + } + } + + auto inp_data = locomotiv::make_data(inp_buf); + locomotiv::annot_data(inp, std::move(inp_data)); + locomotiv::annot_domain(inp, loco::Domain::Tensor); + + auto bias_data = locomotiv::make_data(bias_buf); + locomotiv::annot_data(bias, std::move(bias_data)); + locomotiv::annot_domain(bias, loco::Domain::Bias); + + locomotiv::NodeExecution::get().run(bias_add); + + auto bias_add_data = locomotiv::annot_data(bias_add); + + // comparing the result + ASSERT_NE(bias_add_data, nullptr); + ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(bias_add), loco::Domain::Tensor); +} + +/* +test case generated from the following: + + inp = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) + bias = tf.constant([1.1, 2.1], shape=[2], dtype=tf.float32) + out = tf.nn.bias_add(inp, bias) + + with tf.Session() as sess: + print(sess.run(out)) + */ + +TEST(NodeExecution_FeatureBiasAdd, f32) +{ + float in_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float bias_val[] = {1.1, 2.1}; + float out_val[] = {2.1, 4.1, 4.1, 6.1, 6.1, 8.1, 8.1, 10.1, 10.1, + 12.1, 12.1, 14.1, 14.1, 16.1, 16.1, 18.1, 18.1, 20.1}; + + // make FeatureBiasAdd(FeatureEncode, BiasEncode) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto feature_encode = g->nodes()->create<loco::FeatureEncode>(); + { + // setting values is ignored for testing + } + + auto bias = g->nodes()->create<loco::BiasEncode>(); + { + // nothing to do + } + + auto feature_bias_add = g->nodes()->create<loco::BiasAdd<loco::Domain::Feature>>(); + { + feature_bias_add->value(feature_encode); + feature_bias_add->bias(bias); + } + + // Make and assign data to pull node + auto inp_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_buf.shape()}; e.valid(); e.advance()) + { + inp_buf.at(e.current()) = in_val[n++]; + } + } + + auto bias_buf = make_buffer<float, LexicalLayout>(Shape{2}); + { + int n = 0; + for (IndexEnumerator e{bias_buf.shape()}; e.valid(); e.advance()) + { + bias_buf.at(e.current()) = bias_val[n++]; + } + } + + auto inp_data = locomotiv::make_data(inp_buf); + locomotiv::annot_data(feature_encode, std::move(inp_data)); + locomotiv::annot_domain(feature_encode, loco::Domain::Feature); + + auto bias_data = locomotiv::make_data(bias_buf); + locomotiv::annot_data(bias, std::move(bias_data)); + locomotiv::annot_domain(bias, loco::Domain::Bias); + + locomotiv::NodeExecution::get().run(feature_bias_add); + + auto bias_add_data = locomotiv::annot_data(feature_bias_add); + + // comparing the result + ASSERT_NE(bias_add_data, nullptr); + ASSERT_EQ(bias_add_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(bias_add_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(bias_add_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(bias_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(feature_bias_add), loco::Domain::Feature); +} diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp new file mode 100644 index 000000000..c2f2b44c0 --- /dev/null +++ b/compiler/locomotiv/src/Node/BiasEncode.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <stdexcept> +#include <cassert> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::BiasEncode *bias_enc) +{ + auto input_data = annot_data(bias_enc->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(bias_enc->input()) == loco::Domain::Tensor, + "Input domain should be Tensor"); + validate(input_data->shape()->rank() == 1, "Input data rank must be 1"); + + std::unique_ptr<NodeData> bias_enc_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_bufptr = input_data->as_s32_bufptr(); + bias_enc_data = make_data(*input_bufptr); + break; + } + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + bias_enc_data = make_data(*input_bufptr); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(bias_enc_data != nullptr); + annot_data(bias_enc, std::move(bias_enc_data)); + annot_domain(bias_enc, loco::Domain::Bias); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/BiasEncode.test.cpp b/compiler/locomotiv/src/Node/BiasEncode.test.cpp new file mode 100644 index 000000000..73e2af8a8 --- /dev/null +++ b/compiler/locomotiv/src/Node/BiasEncode.test.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Buffer; + +namespace +{ +template <typename T> loco::DataType loco_dtype() { throw std::runtime_error("Not supported yet"); } +template <> loco::DataType loco_dtype<int32_t>() { return loco::DataType::S32; } +template <> loco::DataType loco_dtype<float>() { return loco::DataType::FLOAT32; } + +template <typename T> const Buffer<T> *as_bufptr(const locomotiv::NodeData *data) +{ + throw std::runtime_error("Not supported yet"); +} +template <> const Buffer<int32_t> *as_bufptr<int32_t>(const locomotiv::NodeData *data) +{ + return data->as_s32_bufptr(); +} +template <> const Buffer<float> *as_bufptr<float>(const locomotiv::NodeData *data) +{ + return data->as_f32_bufptr(); +} + +template <typename T> void test() +{ + // Make pull-BiasEncode graph + auto g = loco::make_graph(); + + auto pull = g->nodes()->create<loco::Pull>(); + { + pull->dtype(loco_dtype<T>()); + pull->shape({1}); + } + + auto bias_enc = g->nodes()->create<loco::BiasEncode>(); + { + bias_enc->input(pull); + } + + // Make and assign data to pull node + auto pull_buf = make_buffer<T, LexicalLayout>(Shape{1}); + { + pull_buf.at(Index{0}) = static_cast<T>(100); + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + } + + locomotiv::NodeExecution::get().run(bias_enc); + + // check + auto bias_enc_data = locomotiv::annot_data(bias_enc); + + ASSERT_NE(bias_enc_data, nullptr); + ASSERT_EQ(bias_enc_data->dtype(), loco_dtype<T>()); + ASSERT_EQ(*(bias_enc_data->shape()), Shape{1}); + ASSERT_EQ(as_bufptr<T>(bias_enc_data)->at(Index{0}), pull_buf.at(Index{0})); + + ASSERT_EQ(locomotiv::annot_domain(bias_enc), loco::Domain::Bias); +} +} // namespace + +TEST(NodeExecution_BiasEncode, s32) { test<int32_t>(); } + +TEST(NodeExecution_BiasEncode, f32) { test<float>(); } diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp new file mode 100644 index 000000000..0360b9fef --- /dev/null +++ b/compiler/locomotiv/src/Node/ConstGen.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <stdexcept> +#include <cassert> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +namespace +{ + +/** + * @brief Get offset based on given shape and index. Assume lexical layout. + * + * examples) + * For shape = {3, 4} and index = {1, 2}, + * offset would be 6 ( = 1 * (4) + 2 ) + * For shape = {2, 3, 4} and index = {1, 0, 2}, + * offset would be 14 ( = 1 * (3*4) + 0 *(4) + 2 ) + */ +inline uint32_t offset_by_index(const Shape &shape, const Index &index) +{ + static const nncc::core::ADT::tensor::LexicalLayout l; + return l.offset(shape, index); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::ConstGen *constgen) +{ + uint32_t volume = 1; + + Shape shape; + shape.resize(constgen->rank()); + for (uint32_t i = 0; i < shape.rank(); ++i) + { + shape.dim(i) = constgen->dim(i).value(); + volume *= shape.dim(i); + } + + std::unique_ptr<NodeData> data = nullptr; + + switch (constgen->dtype()) + { + case loco::DataType::S32: + { + assert(volume == constgen->size<loco::DataType::S32>()); + + auto buf = make_buffer<int32_t, LexicalLayout>(shape); + + for (IndexEnumerator e{shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + uint32_t offset = ::offset_by_index(shape, index); + buf.at(index) = constgen->at<loco::DataType::S32>(offset); + } + + data = locomotiv::make_data(buf); + break; + } + case loco::DataType::FLOAT32: + { + assert(volume == constgen->size<loco::DataType::FLOAT32>()); + + auto buf = make_buffer<float, LexicalLayout>(shape); + + for (IndexEnumerator e{shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + uint32_t offset = ::offset_by_index(shape, index); + buf.at(index) = constgen->at<loco::DataType::FLOAT32>(offset); + } + + data = locomotiv::make_data(buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(data != nullptr); + annot_data(constgen, std::move(data)); + annot_domain(constgen, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/ConstGen.test.cpp b/compiler/locomotiv/src/Node/ConstGen.test.cpp new file mode 100644 index 000000000..838f4c11d --- /dev/null +++ b/compiler/locomotiv/src/Node/ConstGen.test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_ConstGen, s32) +{ + // Make ConstGen node + loco::ConstGen constgen; + + constgen.dtype(loco::DataType::S32); + constgen.shape({2, 3}); + constgen.size<loco::DataType::S32>(6); + + constgen.at<loco::DataType::S32>(0) = 0; // Set 0,0 + constgen.at<loco::DataType::S32>(1) = 1; // Set 0,1 + constgen.at<loco::DataType::S32>(2) = 2; // Set 0,2 + constgen.at<loco::DataType::S32>(3) = -3; // Set 1,0 + constgen.at<loco::DataType::S32>(4) = -4; // Set 1,1 + constgen.at<loco::DataType::S32>(5) = -5; // Set 1,2 + + // run execution + locomotiv::NodeExecution::get().run(&constgen); + + // test + auto data = locomotiv::annot_data(&constgen); + ASSERT_NE(data, nullptr); + ASSERT_EQ(data->dtype(), loco::DataType::S32); + ASSERT_EQ(*data->shape(), Shape({2, 3})); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 0}), 0); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 1}), 1); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{0, 2}), 2); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 0}), -3); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 1}), -4); + ASSERT_EQ(data->as_s32_bufptr()->at(Index{1, 2}), -5); + + ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor); +} + +TEST(NodeExecution_ConstGen, f32) +{ + // Make ConstGen node + loco::ConstGen constgen; + + constgen.dtype(loco::DataType::FLOAT32); + constgen.shape({2, 3}); + constgen.size<loco::DataType::FLOAT32>(6); + + constgen.at<loco::DataType::FLOAT32>(0) = 0.0f; // Set 0,0 + constgen.at<loco::DataType::FLOAT32>(1) = 1.0f; // Set 0,1 + constgen.at<loco::DataType::FLOAT32>(2) = 2.0f; // Set 0,2 + constgen.at<loco::DataType::FLOAT32>(3) = 3.0f; // Set 1,0 + constgen.at<loco::DataType::FLOAT32>(4) = 4.0f; // Set 1,1 + constgen.at<loco::DataType::FLOAT32>(5) = 5.0f; // Set 1,2 + + // run execution + locomotiv::NodeExecution::get().run(&constgen); + + // test + auto data = locomotiv::annot_data(&constgen); + ASSERT_NE(data, nullptr); + ASSERT_EQ(data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*data->shape(), Shape({2, 3})); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 0}), 0.0f); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 1}), 1.0f); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{0, 2}), 2.0f); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 0}), 3.0f); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 1}), 4.0f); + ASSERT_FLOAT_EQ(data->as_f32_bufptr()->at(Index{1, 2}), 5.0f); + + ASSERT_EQ(locomotiv::annot_domain(&constgen), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp new file mode 100644 index 000000000..2e4185574 --- /dev/null +++ b/compiler/locomotiv/src/Node/Conv2D.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ +// image size includes padding. +inline uint32_t compute_out_size(uint32_t image_size, uint32_t filter_size, uint32_t stride) +{ + assert((image_size + stride - filter_size) % stride == 0); + return (image_size + stride - filter_size) / stride; +} + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Calculates Conv2D + * @note Both input_buf and filter_buf have NHWC format + */ +template <typename RET_T, typename IFM_T, typename FIL_T> +Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input_buf, + const Buffer<FIL_T> *filter_buf) +{ + auto input_shape = input_buf->shape(); + auto filter_shape = filter_buf->shape(); + + locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4"); + locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4"); + locomotiv::validate(input_shape.dim(3) == filter_shape.dim(3), + "channel value mismatch"); // should have same channel values + + const uint32_t input_height = input_shape.dim(1); + const uint32_t input_width = input_shape.dim(2); + + const uint32_t filter_height = filter_shape.dim(1); + const uint32_t filter_width = filter_shape.dim(2); + + const uint32_t stride_width = conv2d->stride()->horizontal(); + const uint32_t stride_height = conv2d->stride()->vertical(); + + // TODO Enable dilations. Let's set these to 1 for now. + const uint32_t dilation_width_factor = 1; + const uint32_t dilation_height_factor = 1; + + const uint32_t pad_top = conv2d->pad()->top(); + const uint32_t pad_bottom = conv2d->pad()->bottom(); + + const uint32_t pad_left = conv2d->pad()->left(); + const uint32_t pad_right = conv2d->pad()->right(); + + const uint32_t output_height = + compute_out_size(input_height + pad_top + pad_bottom, filter_height, stride_height); + const uint32_t output_width = + compute_out_size(input_width + pad_left + pad_right, filter_width, stride_width); + + const uint32_t batches = input_shape.dim(0); + const uint32_t input_depth = input_shape.dim(3); + const uint32_t output_depth = filter_shape.dim(0); + + Shape output_shape{batches, output_height, output_width, output_depth}; + auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape); + + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t out_y = 0; out_y < output_height; ++out_y) + { + for (uint32_t out_x = 0; out_x < output_width; ++out_x) + { + for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel) + { + const int in_x_origin = (out_x * stride_width) - pad_left; + const int in_y_origin = (out_y * stride_height) - pad_top; + + RET_T total = static_cast<RET_T>(0); + + for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel) + { + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && ((unsigned)in_x < input_width) && (in_y >= 0) && + ((unsigned)in_y < input_height)) + { + auto input_value = + input_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, in_channel})); + auto filter_value = + filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel})); + total += (input_value * filter_value); + } + } + } + } + output_buf.at(Index({batch, out_y, out_x, out_channel})) = total; + } + } + } + } + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Conv2D *conv2d) +{ + auto ifm_data = annot_data(conv2d->ifm()); + auto ker_data = annot_data(conv2d->ker()); + + validate(ifm_data, "Can't find input data of Conv2D"); + validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4"); + + validate(ker_data, "Can't find kernel data of Conv2D"); + validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4"); + + validate(annot_domain(conv2d->ifm()) == loco::Domain::Feature, "IFM of Conv2D is not feature"); + validate(annot_domain(conv2d->ker()) == loco::Domain::Filter, "Kernel of Conv2D is not filter"); + + std::unique_ptr<NodeData> conv2d_result = nullptr; + + if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32) + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + auto ker_buf = ker_data->as_f32_bufptr(); + + auto conv2d_buf = calc_conv2D<float, float, float>(conv2d, ifm_buf, ker_buf); + + conv2d_result = make_data(conv2d_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(conv2d_result != nullptr); + + annot_data(conv2d, std::move(conv2d_result)); + annot_domain(conv2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Conv2D.test.cpp b/compiler/locomotiv/src/Node/Conv2D.test.cpp new file mode 100644 index 000000000..83d7fc268 --- /dev/null +++ b/compiler/locomotiv/src/Node/Conv2D.test.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape, + const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v, + const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0, + const uint32_t pad_left = 0, const uint32_t pad_right = 0) +{ + auto g = loco::make_graph(); + + // Fill output data of FeatureEncode from ifm + auto ifm_enc = g->nodes()->create<loco::FeatureEncode>(); + { + auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape); + auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ifm_enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ifm_enc_buf); + locomotiv::annot_data(ifm_enc, std::move(enc_data)); + locomotiv::annot_domain(ifm_enc, loco::Domain::Feature); + } + + // Fill output data of FilterEncode from ker + auto ker_enc = g->nodes()->create<loco::FilterEncode>(); + { + auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape); + auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ker_enc_buf.at(ind) = ker_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ker_enc_buf); + locomotiv::annot_data(ker_enc, std::move(enc_data)); + locomotiv::annot_domain(ker_enc, loco::Domain::Filter); + } + + // build Conv2D + auto conv2d = g->nodes()->create<loco::Conv2D>(); + conv2d->ifm(ifm_enc); + conv2d->ker(ker_enc); + conv2d->stride()->vertical(stride_v); + conv2d->stride()->horizontal(stride_h); + conv2d->pad()->top(pad_top); + conv2d->pad()->bottom(pad_bottom); + conv2d->pad()->left(pad_left); + conv2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(conv2d); + + // get result of calculation + auto conv2d_result = locomotiv::annot_data(conv2d); + + // check the result + ASSERT_NE(conv2d_result, nullptr); + ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(conv2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off +/* ifm and ofm are from the code below: + +ifm = tf.random_normal([1, 5, 5, 1], stddev=1) +ker = tf.random_normal([3, 3, 1, 1], stddev=1) +out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID') + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_Conv2D, f32_1x5x5x1_calculation) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + -0.48850584, 1.4292705, -1.3424522, -0.7441476, -1.8964586, + 1.7021934, -0.39246717, 0.6248314, 0.12724274, 1.3915083, + 0.382255, 0.7725081, 0.9171561, -1.1847119, 0.61858755, + 1.1530193, -0.476239, -0.9038663, -0.48764458, 0.339963, + 2.2817912, -0.8464133, -1.0598192, 0.8361126, 1.2344601 + }; + + const float ker[] = + { + -0.0830195, 0.21088193, -0.11781317, + 0.07755677, 1.6337638, 1.0792778, + -1.6922939, -1.5437212, 0.96667504 + }; + + const float ofm[] = + { + -0.28752697, 2.8108592, + -5.220376 , 0.7973861 + }; + + run_test(ifm, ker, ofm, + Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // shapes of input, ker, output + 2, 2 // stride + ); +} + +TEST(NodeExecution_Conv2D, f32_multiple_channel) +{ + // testing channel != 1, stride = [1,1] + using nncc::core::ADT::tensor::Shape; + + float ifm[1*5*5*3]; + for (int n = 0; n < 5*5*3; n++) ifm[n] = 2.2; + + float ker[2*2*2*3]; // nhwc + for (int n = 0; n < 2*2*2*3; n++) ker[n] = 1.1; + + float ofm[1*4*4*2]; + for (int n = 0; n < 1*4*4*2; n++) ofm[n] = 29.04; + + run_test(ifm, ker, ofm, + Shape{1, 5, 5, 3}, Shape{2, 2, 2, 3}, Shape{1, 4, 4, 2}, // shapes of input, ker, output + 1, 1 // stride + ); +} + +/* ifm and ofm are from the code below: +tensorflow version : 1.12.0 + +import tensorflow as tf + +ifm = tf.constant([-1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856, + -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511, + 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274, + -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331, + 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603 +], shape=[1, 5, 5, 1]) + +ker = tf.constant([2.3490515, -0.4572366, 0.05790535, + 0.3672005, 0.52679914, 0.74607974, + -1.7211207, 1.1174419, -0.59663385 +], shape=[3, 3, 1, 1]) + +ofm = tf.nn.conv2d(ifm, ker, strides=[1, 1, 1, 1], padding='SAME') + +with tf.Session() as sess: + print(sess.run(ofm)) +*/ +TEST(NodeExecution_Conv2D, with_padding) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + -1.3653529, 0.4160791, 0.5059157, 0.7649683, 0.39364856, + -1.0164733, 1.506766, -1.1413091, 1.2766701, -0.9253511, + 1.3570246, 0.32089928, -0.9898171, 1.983792, -0.3423274, + -1.1901658, 1.2288222, -0.47401968, -0.01369802, 0.4136331, + 0.06960588, -0.16537654, -0.65015996, -0.555224, 0.7140603 + }; + + const float ker[] = + { + 2.3490515, -0.4572366, 0.05790535, + 0.3672005, 0.52679914, 0.74607974, + -1.7211207, 1.1174419, -0.59663385 + }; + + const float ofm[] = + { + -2.443676, 4.2094254, -3.6403496, 4.8254814, -2.743059, + 2.5620093, -5.185688, -1.1470609, 4.54913, -2.1985974, + -0.5567835, 0.49045527, 2.5752437, -2.3383713, 4.455967, + -0.13562866, 2.9236434, 1.4019353, -3.0521483, 6.782954, + 0.5286269, -3.9317036, 2.285041, -1.0817666, -0.04901773 + }; + + run_test(ifm, ker, ofm, + Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 5, 5, 1}, // shapes of input, ker, output + 1, 1, // stride + 1, 1, 1, 1 // padding + ); +} +// clang-format on diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp new file mode 100644 index 000000000..92d5aa161 --- /dev/null +++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ + +/** + * @brief Compute 1D output size based on given 1D arguments. + * + * @param whole_pad Sum of front and back pad + */ +inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size, + uint32_t stride) +{ + assert((image_size + whole_pad - filter_size) % stride == 0); + return (image_size + whole_pad - filter_size) / stride + 1; +} + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Calculates DepthwiseConv2D + * @note ifm_buf has NHWC and ker_buf HWCM format + * (Please check locomotiv README for further information) + */ +template <typename RET_T, typename IFM_T, typename KER_T> +Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffer<IFM_T> *ifm_buf, + const Buffer<KER_T> *ker_buf) +{ + auto ifm_shape = ifm_buf->shape(); + auto ker_shape = ker_buf->shape(); + + locomotiv::validate(ifm_shape.rank() == 4, "ifm rank must be 4"); + locomotiv::validate(ker_shape.rank() == 4, "depthwise filter rank must be 4"); + locomotiv::validate(ifm_shape.dim(3 /* of NHWC */) == ker_shape.dim(2 /* of HWCM */), + "channel value mismatch"); // should have same channel values + + const uint32_t ifm_height = ifm_shape.dim(1); + const uint32_t ifm_width = ifm_shape.dim(2); + + const uint32_t ker_height = ker_shape.dim(0); + const uint32_t ker_width = ker_shape.dim(1); + + const uint32_t stride_width = dw_conv2d->stride()->horizontal(); + const uint32_t stride_height = dw_conv2d->stride()->vertical(); + + // TODO Enable dilations. Let's set these to 1 for now. + const uint32_t dilation_width_factor = 1; + const uint32_t dilation_height_factor = 1; + + const uint32_t pad_top = dw_conv2d->pad()->top(); + const uint32_t pad_bottom = dw_conv2d->pad()->bottom(); + + const uint32_t pad_left = dw_conv2d->pad()->left(); + const uint32_t pad_right = dw_conv2d->pad()->right(); + + const uint32_t ofm_height = + compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height); + const uint32_t ofm_width = + compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width); + + const uint32_t batches = ifm_shape.dim(0); + const uint32_t ifm_depth = ifm_shape.dim(3); + const uint32_t multiplier = ker_shape.dim(3); + const uint32_t ofm_depth = ifm_depth * multiplier; + + Shape ofm_shape{batches, ofm_height, ofm_width, ofm_depth}; + auto ofm_buf = make_buffer<RET_T, LexicalLayout>(ofm_shape); + + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t ofm_y = 0; ofm_y < ofm_height; ++ofm_y) + { + for (uint32_t ofm_x = 0; ofm_x < ofm_width; ++ofm_x) + { + for (uint32_t ch = 0; ch < ifm_depth; ++ch) + { + for (uint32_t nth = 0; nth < multiplier; nth++) + { + const int in_x_origin = (ofm_x * stride_width) - pad_left; + const int in_y_origin = (ofm_y * stride_height) - pad_top; + float total = 0.f; + for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y) + { + for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x) + { + const int in_x = in_x_origin + dilation_width_factor * ker_x; + const int in_y = in_y_origin + dilation_height_factor * ker_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && ((unsigned)in_x < ifm_width) && (in_y >= 0) && + ((unsigned)in_y < ifm_height)) + { + auto ifm_value = ifm_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, ch})); + auto ker_value = ker_buf->at(Index({ker_y, ker_x, ch, nth})); + total += (ifm_value * ker_value); + } + } + } + uint32_t ofm_channel = ch * multiplier + nth; + ofm_buf.at(Index({batch, ofm_y, ofm_x, ofm_channel})) = total; + } + } + } + } + } + return ofm_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) +{ + auto ifm_data = annot_data(dw_conv2d->ifm()); + auto ker_data = annot_data(dw_conv2d->ker()); + + validate(ifm_data, "Can't find input data of DepthwiseConv2D"); + validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4"); + + validate(ker_data, "Can't find kernel data of DepthwiseConv2D"); + validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4"); + + validate(annot_domain(dw_conv2d->ifm()) == loco::Domain::Feature, + "IFM of DepthwiseConv2D is not feature"); + validate(annot_domain(dw_conv2d->ker()) == loco::Domain::DepthwiseFilter, + "Kernel of DepthwiseConv2D is not depthwise filter"); + + std::unique_ptr<NodeData> dw_conv2d_result = nullptr; + + if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32) + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + auto ker_buf = ker_data->as_f32_bufptr(); + + auto dw_conv2d_buf = calc_dw_conv2d<float, float, float>(dw_conv2d, ifm_buf, ker_buf); + + dw_conv2d_result = make_data(dw_conv2d_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(dw_conv2d_result != nullptr); + + annot_data(dw_conv2d, std::move(dw_conv2d_result)); + annot_domain(dw_conv2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp new file mode 100644 index 000000000..48824c2e0 --- /dev/null +++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.test.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape, + const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v, + const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0, + const uint32_t pad_left = 0, const uint32_t pad_right = 0) +{ + auto g = loco::make_graph(); + + // Fill output data of FeatureEncode from ifm + auto ifm_enc = g->nodes()->create<loco::FeatureEncode>(); + { + auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape); + auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ifm_enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ifm_enc_buf); + locomotiv::annot_data(ifm_enc, std::move(enc_data)); + locomotiv::annot_domain(ifm_enc, loco::Domain::Feature); + } + + // Fill output data of DepthwiseFilterEncode from ker + auto ker_enc = g->nodes()->create<loco::DepthwiseFilterEncode>(); + { + auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape); + auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ker_enc_buf.at(ind) = ker_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ker_enc_buf); + locomotiv::annot_data(ker_enc, std::move(enc_data)); + locomotiv::annot_domain(ker_enc, loco::Domain::DepthwiseFilter); + } + + // build DepthwiseConv2D + auto dw_conv2d = g->nodes()->create<loco::DepthwiseConv2D>(); + dw_conv2d->ifm(ifm_enc); + dw_conv2d->ker(ker_enc); + dw_conv2d->stride()->vertical(stride_v); + dw_conv2d->stride()->horizontal(stride_h); + dw_conv2d->pad()->top(pad_top); + dw_conv2d->pad()->bottom(pad_bottom); + dw_conv2d->pad()->left(pad_left); + dw_conv2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(dw_conv2d); + + // get result of calculation + auto dw_conv2d_result = locomotiv::annot_data(dw_conv2d); + + // check the result + ASSERT_NE(dw_conv2d_result, nullptr); + ASSERT_TRUE(dw_conv2d_result->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(dw_conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(dw_conv2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off + +/* ifm, ker and ofm are from the code below: + +ifm = tf.random_normal([1, 5, 5, 2], stddev=1.1) +ker = tf.random_normal([4, 4, 2, 3], stddev=1.1) +out = tf.nn.depthwise_conv2d(ifm, ker, strides = [1, 1, 1, 1], padding= 'VALID') + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_DepthwiseConv2D, f32_random_valid) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = {0.8122538, 1.209147, 0.6903842, -0.26646265, 1.516799, -1.8540707, + -0.74240327, 1.7811562, -0.03699546, -0.44468504, -1.4982721, -1.1858582, + -0.21140318, -0.974522, 1.0000849, -1.294535, -0.6108882, 0.25827602, + 1.3631831, -0.5180266, 0.20870179, 0.18333802, -0.42263857, -1.6694735, + 0.0415236, -0.3903758, 2.0933757, -0.29660916, 2.1218338, -1.1599928, + 0.57163256, 0.48865932, -1.3622656, 0.35924262, 1.2951899, -0.1769997, + 0.74513537, -0.31920406, -1.2902768, -0.7095059, 1.9157801, -0.41028237, + 1.2502829, 0.3354887, 1.4199319, -0.20366786, -0.8828556, 0.5173567, + 1.7708117, -0.30096334}; + const float ker[] = { + -0.19805557, 0.58464956, -0.7804337, 0.06974592, 0.45790604, 0.24833807, 0.43393376, + 0.2541043, -0.04406675, -0.32167575, 1.0546446, -1.4978354, 0.20829494, 1.1659569, + 0.37908667, -0.94137955, 0.293349, -1.1023049, 0.76133233, 0.55595005, 1.4458209, + 1.6128604, 1.5655615, -2.183877, -0.90535915, -0.49858555, 1.7168728, -1.1590382, + 0.6706056, 1.2215618, -0.06603386, 0.16559464, 0.541991, -0.44488335, 0.766181, + 1.0227629, -0.6352362, -1.670828, -0.63334507, 0.0313305, -0.6721083, 0.50112915, + -0.15218066, 0.67222077, -0.3613627, -0.08516614, -0.5024078, -0.9503976, -2.1892295, + 1.8308185, -0.15187284, 1.5761136, 0.24869336, -1.7378871, -0.22518761, 1.0175673, + 0.7084485, -0.74157554, -1.8185995, -1.3330095, -0.04427439, 1.0556892, -0.68243974, + 0.32001218, 2.0901792, -1.1612813, 0.7294674, 0.05740008, -0.00832882, 1.0446658, + 0.4477195, -0.09174404, -1.0176039, 1.5066665, -2.148343, 0.29421416, 0.93011874, + -0.15737922, -1.6444012, 0.25780794, -0.6545867, -0.3488956, 0.26167992, -0.154414, + 0.2798124, -0.8590068, 2.0494444, 0.48268002, 0.81941164, -0.4848027, 0.76870304, + 0.7102261, 0.45778143, 0.23214905, -0.17742023, -0.75016516}; + const float ofm[] = {4.474646, 0.6792067, -1.9799856, 7.484751, 4.3087378, -1.905938, + 1.4887369, 0.4361322, 0.79539883, -3.8583446, -4.502204, 4.356392, + -5.3030324, 3.493003, -4.349277, 2.3069482, -3.8881323, -0.73901534, + -0.6629516, 2.1247253, -4.9229584, 1.6716996, -3.0208125, 1.0597891}; + + run_test(ifm, ker, ofm, + Shape{1, 5, 5, 2}, Shape{4, 4, 2, 3}, Shape{1, 2, 2, 6}, // shapes of input, ker, output + 1, 1 // stride + ); +} + +// TODO Add same padding test + +// clang-format on diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp new file mode 100644 index 000000000..17004901f --- /dev/null +++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; + +/** + * @brief Encode input tensor into depthwise filter represented in "HWCM" layout + * + * (Please check locomotiv README for further information) + */ +template <typename T> +std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilterEncode *node, + const Buffer<T> *input_buf) +{ + auto encoder = node->encoder(); + + // Make TensorShape from input + loco::TensorShape input_shape; + input_shape.rank(input_buf->shape().rank()); + assert(input_shape.rank() == 4); + for (uint32_t i = 0; i < input_shape.rank(); ++i) + { + input_shape.dim(i) = input_buf->shape().dim(i); + } + + loco::DepthwiseFilterShape node_shape = encoder->shape(input_shape); + + // Make HWCM (i.e. height, width, depth, multiplier) buffer from DepthwiseFilterShape + Buffer<T> node_buf = make_buffer<T, LexicalLayout>( + Shape{node_shape.height().value(), node_shape.width().value(), node_shape.depth().value(), + node_shape.multiplier().value()}); + + // Copy buffer in an order arranged by encoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::DepthwiseFilterIndex index; + index.row() = e.current().at(0); + index.column() = e.current().at(1); + index.channel() = e.current().at(2); + index.nth() = e.current().at(3); + + node_buf.at(e.current()) = input_buf->at(encoder->value(index)); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) +{ + auto input_data = annot_data(enc->input()); + + validate(input_data, "Input of DepthwiseFilterEncode not ready"); + validate(annot_domain(enc->input()) == loco::Domain::Tensor, + "Input of DepthwiseFilterEncode is not Tensor"); + validate(input_data->shape()->rank() == 4, "Input shape mismatch"); + + std::unique_ptr<NodeData> enc_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + enc_data = dw_filter_encode<float>(enc, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(enc_data != nullptr); + annot_data(enc, std::move(enc_data)); + annot_domain(enc, loco::Domain::DepthwiseFilter); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp new file mode 100644 index 000000000..db828c08b --- /dev/null +++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <loco/IR/PermutingCodec.h> + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +TEST(NodeExecution_DepthwiseFilterEncode, f32) +{ + const uint32_t H = 2; + const uint32_t W = 3; + const uint32_t C = 4; + const uint32_t M = 5; + + auto g = loco::make_graph(); + + // Pull + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + + // Make and assign "MHWC" data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{M, H, W, C}); + float f = 1; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = f; + f += 0.1f; // Doesn't matter what it is + } + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + // Encoder to correctly read input tensor as MHWC + auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::DepthwiseFilter>>( + new loco::PermutingEncoder<loco::Domain::DepthwiseFilter>); + encoder->perm()->axis(loco::DepthwiseFilterAxis::Multiplier) = 0; + encoder->perm()->axis(loco::DepthwiseFilterAxis::Height) = 1; + encoder->perm()->axis(loco::DepthwiseFilterAxis::Width) = 2; + encoder->perm()->axis(loco::DepthwiseFilterAxis::Depth) = 3; + + // DepthwiseFilterEncode + auto enc = g->nodes()->create<loco::DepthwiseFilterEncode>(); + enc->input(pull); + enc->encoder(std::move(encoder)); + + locomotiv::NodeExecution::get().run(enc); + + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(enc_data->shape()), (Shape{H, W, C, M})); // locomotiv depthwise filter is HWCM + auto enc_buf = enc_data->as_f32_bufptr(); + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + for (uint32_t m = 0; m < M; ++m) + ASSERT_FLOAT_EQ(pull_buf.at(Index{m, h, w, c}), enc_buf->at(Index{h, w, c, m})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::DepthwiseFilter); +} diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.cpp new file mode 100644 index 000000000..e5e2d67c7 --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseAdd.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseAdd *eltwise_add) +{ + struct Func final : public BinaryFunc + { + float apply(float lhs, float rhs) const { return lhs + rhs; } + }; + + Func f; + + eltwise_binary(eltwise_add, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp new file mode 100644 index 000000000..2899dccdd --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseAdd.test.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + +x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +y = tf.constant([-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18], + shape=[1, 3, 3, 2], dtype=tf.float32) +out = tf.math.add(x, y) + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_EltwiseAdd, f32) +{ + float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y_val[] = {-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18}; + float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + // make EltwiseAdd(Pull, Pull) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp_lhs = g->nodes()->create<loco::Pull>(); + { + inp_lhs->dtype(loco::DataType::FLOAT32); + inp_lhs->shape({1, 3, 3, 2}); + } + + auto inp_rhs = g->nodes()->create<loco::Pull>(); + { + inp_rhs->dtype(loco::DataType::FLOAT32); + inp_rhs->shape({1, 3, 3, 2}); + } + + auto eltwise_add = g->nodes()->create<loco::EltwiseAdd>(); + { + eltwise_add->lhs(inp_lhs); + eltwise_add->rhs(inp_rhs); + } + + // Make and assign data to two pull nodes + auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance()) + { + inp_lhs_buf.at(e.current()) = x_val[n++]; + } + } + + auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance()) + { + inp_rhs_buf.at(e.current()) = y_val[n++]; + } + } + + auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf); + locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data)); + locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor); + + auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf); + locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data)); + locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor); + + // run the network + locomotiv::NodeExecution::get().run(eltwise_add); + + // get result + auto eltwise_add_data = locomotiv::annot_data(eltwise_add); + + // comparing the result + ASSERT_NE(eltwise_add_data, nullptr); + ASSERT_EQ(eltwise_add_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(eltwise_add_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(eltwise_add_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(eltwise_add_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(eltwise_add), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.cpp new file mode 100644 index 000000000..a054d9a97 --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseDiv.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseDiv *eltwise_div) +{ + struct Func final : public BinaryFunc + { + float apply(float lhs, float rhs) const { return lhs / rhs; } + }; + + Func f; + + eltwise_binary(eltwise_div, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp new file mode 100644 index 000000000..60950c15b --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseDiv.test.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + +x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +out = tf.div(x, y) + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_EltwiseDiv, f32) +{ + float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float out_val[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + // make EltwiseDiv(Pull, Pull) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp_lhs = g->nodes()->create<loco::Pull>(); + { + inp_lhs->dtype(loco::DataType::FLOAT32); + inp_lhs->shape({1, 3, 3, 2}); + } + + auto inp_rhs = g->nodes()->create<loco::Pull>(); + { + inp_rhs->dtype(loco::DataType::FLOAT32); + inp_rhs->shape({1, 3, 3, 2}); + } + + auto eltwise_div = g->nodes()->create<loco::EltwiseDiv>(); + { + eltwise_div->lhs(inp_lhs); + eltwise_div->rhs(inp_rhs); + } + + // Make and assign data to two pull nodes + auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance()) + { + inp_lhs_buf.at(e.current()) = x_val[n++]; + } + } + + auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance()) + { + inp_rhs_buf.at(e.current()) = y_val[n++]; + } + } + + auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf); + locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data)); + locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor); + + auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf); + locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data)); + locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor); + + // run the network + locomotiv::NodeExecution::get().run(eltwise_div); + + // get result + auto eltwise_div_data = locomotiv::annot_data(eltwise_div); + + // comparing the result + ASSERT_NE(eltwise_div_data, nullptr); + ASSERT_EQ(eltwise_div_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(eltwise_div_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(eltwise_div_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(eltwise_div_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(eltwise_div), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/EltwiseMax.cpp b/compiler/locomotiv/src/Node/EltwiseMax.cpp new file mode 100644 index 000000000..ec44fd6fa --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseMax.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include <cstdlib> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseMax *eltwise_max) +{ + struct Func final : public BinaryFunc + { + float apply(float lhs, float rhs) const { return std::max(lhs, rhs); } + }; + + Func f; + + eltwise_binary(eltwise_max, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseMax.test.cpp b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp new file mode 100644 index 000000000..c64db8994 --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseMax.test.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + +x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +y = tf.constant([18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], + shape=[1, 3, 3, 2], dtype=tf.float32) +out = tf.math.maximum(x, y) + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_EltwiseMax, f32) +{ + float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + float out_val[] = {18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + + // make EltwiseMax(Pull, Pull) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp_lhs = g->nodes()->create<loco::Pull>(); + { + inp_lhs->dtype(loco::DataType::FLOAT32); + inp_lhs->shape({1, 3, 3, 2}); + } + + auto inp_rhs = g->nodes()->create<loco::Pull>(); + { + inp_rhs->dtype(loco::DataType::FLOAT32); + inp_rhs->shape({1, 3, 3, 2}); + } + + auto eltwise_max = g->nodes()->create<loco::EltwiseMax>(); + { + eltwise_max->lhs(inp_lhs); + eltwise_max->rhs(inp_rhs); + } + + // Make and assign data to two pull nodes + auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance()) + { + inp_lhs_buf.at(e.current()) = x_val[n++]; + } + } + + auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance()) + { + inp_rhs_buf.at(e.current()) = y_val[n++]; + } + } + + auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf); + locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data)); + locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor); + + auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf); + locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data)); + locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor); + + // run the network + locomotiv::NodeExecution::get().run(eltwise_max); + + // get result + auto eltwise_max_data = locomotiv::annot_data(eltwise_max); + + // comparing the result + ASSERT_NE(eltwise_max_data, nullptr); + ASSERT_EQ(eltwise_max_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(eltwise_max_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(eltwise_max_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(eltwise_max_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(eltwise_max), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/EltwiseMul.cpp b/compiler/locomotiv/src/Node/EltwiseMul.cpp new file mode 100644 index 000000000..6720ab92f --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseMul.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseMul *eltwise_mul) +{ + struct Func final : public BinaryFunc + { + float apply(float lhs, float rhs) const { return lhs * rhs; } + }; + + Func f; + + eltwise_binary(eltwise_mul, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseMul.test.cpp b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp new file mode 100644 index 000000000..b76888300 --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseMul.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + +x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +y = tf.constant([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], shape=[1, 3, 3, 2], + dtype=tf.float32) +out = tf.math.multiply(x, y) + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_EltwiseMul, f32) +{ + float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y_val[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; + float out_val[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8}; + + // make EltwiseMul(Pull, Pull) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp_lhs = g->nodes()->create<loco::Pull>(); + { + inp_lhs->dtype(loco::DataType::FLOAT32); + inp_lhs->shape({1, 3, 3, 2}); + } + + auto inp_rhs = g->nodes()->create<loco::Pull>(); + { + inp_rhs->dtype(loco::DataType::FLOAT32); + inp_rhs->shape({1, 3, 3, 2}); + } + + auto eltwise_mul = g->nodes()->create<loco::EltwiseMul>(); + { + eltwise_mul->lhs(inp_lhs); + eltwise_mul->rhs(inp_rhs); + } + + // Make and assign data to two pull nodes + auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance()) + { + inp_lhs_buf.at(e.current()) = x_val[n++]; + } + } + + auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance()) + { + inp_rhs_buf.at(e.current()) = y_val[n++]; + } + } + + auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf); + locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data)); + locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor); + + auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf); + locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data)); + locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor); + + // run the network + locomotiv::NodeExecution::get().run(eltwise_mul); + + // get result + auto eltwise_mul_data = locomotiv::annot_data(eltwise_mul); + + // comparing the result + ASSERT_NE(eltwise_mul_data, nullptr); + ASSERT_EQ(eltwise_mul_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(eltwise_mul_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(eltwise_mul_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(eltwise_mul_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(eltwise_mul), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp new file mode 100644 index 000000000..b4625a757 --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseSqrt.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include <cmath> + +namespace +{ + +inline float sqrt_ew(float val) { return sqrt(val); } + +struct Func final : public locomotiv::UnaryFunc +{ + float apply(float v) const final { return sqrt_ew(v); } +}; + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseSqrt *sqrt_node) +{ + Func f; + + eltwise_unary(sqrt_node, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp new file mode 100644 index 000000000..adb1b853e --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseSqrt.test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +#include <cmath> +#include <limits> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_EltwiseSqrt, f32) +{ + // Make Pull-EltwiseSqrt graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({4}); + auto sqrt = g->nodes()->create<loco::EltwiseSqrt>(); + sqrt->input(pull); + + // Make and assign data to Pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4}); + pull_buf.at(Index{0}) = 4.0f; + pull_buf.at(Index{1}) = 9.0f; + pull_buf.at(Index{2}) = 0.0f; + pull_buf.at(Index{3}) = -1.0f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(sqrt); + + auto sqrt_data = locomotiv::annot_data(sqrt); + ASSERT_NE(sqrt_data, nullptr); + ASSERT_EQ(sqrt_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(sqrt_data->shape()), Shape{4}); + ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{0}), 2.0f); + ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{1}), 3.0f); + ASSERT_FLOAT_EQ(sqrt_data->as_f32_bufptr()->at(Index{2}), 0.0f); + ASSERT_TRUE(std::isnan(sqrt_data->as_f32_bufptr()->at(Index{3}))); + + ASSERT_EQ(locomotiv::annot_domain(sqrt), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/EltwiseSub.cpp b/compiler/locomotiv/src/Node/EltwiseSub.cpp new file mode 100644 index 000000000..7943f950b --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseSub.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +namespace locomotiv +{ + +void NodeExecution::execute(loco::EltwiseSub *eltwise_sub) +{ + struct Func final : public BinaryFunc + { + float apply(float lhs, float rhs) const { return lhs - rhs; } + }; + + Func f; + + eltwise_binary(eltwise_sub, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/EltwiseSub.test.cpp b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp new file mode 100644 index 000000000..7eff90f9e --- /dev/null +++ b/compiler/locomotiv/src/Node/EltwiseSub.test.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +/* +test case generated from the following: + +x = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +y = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + shape=[1, 3, 3, 2], dtype=tf.float32) +out = tf.math.subtract(x, y) + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_EltwiseSub, f32) +{ + float x_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y_val[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float out_val[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + // make EltwiseSub(Pull, Pull) + auto g = loco::make_graph(); + Shape input_shape{1, 3, 3, 2}; // NHWC + + auto inp_lhs = g->nodes()->create<loco::Pull>(); + { + inp_lhs->dtype(loco::DataType::FLOAT32); + inp_lhs->shape({1, 3, 3, 2}); + } + + auto inp_rhs = g->nodes()->create<loco::Pull>(); + { + inp_rhs->dtype(loco::DataType::FLOAT32); + inp_rhs->shape({1, 3, 3, 2}); + } + + auto eltwise_sub = g->nodes()->create<loco::EltwiseSub>(); + { + eltwise_sub->lhs(inp_lhs); + eltwise_sub->rhs(inp_rhs); + } + + // Make and assign data to two pull nodes + auto inp_lhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_lhs_buf.shape()}; e.valid(); e.advance()) + { + inp_lhs_buf.at(e.current()) = x_val[n++]; + } + } + + auto inp_rhs_buf = make_buffer<float, LexicalLayout>(input_shape); + { + int n = 0; + for (IndexEnumerator e{inp_rhs_buf.shape()}; e.valid(); e.advance()) + { + inp_rhs_buf.at(e.current()) = y_val[n++]; + } + } + + auto inp_lhs_data = locomotiv::make_data(inp_lhs_buf); + locomotiv::annot_data(inp_lhs, std::move(inp_lhs_data)); + locomotiv::annot_domain(inp_lhs, loco::Domain::Tensor); + + auto inp_rhs_data = locomotiv::make_data(inp_rhs_buf); + locomotiv::annot_data(inp_rhs, std::move(inp_rhs_data)); + locomotiv::annot_domain(inp_rhs, loco::Domain::Tensor); + + // run the network + locomotiv::NodeExecution::get().run(eltwise_sub); + + // get result + auto eltwise_sub_data = locomotiv::annot_data(eltwise_sub); + + // comparing the result + ASSERT_NE(eltwise_sub_data, nullptr); + ASSERT_EQ(eltwise_sub_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(eltwise_sub_data->shape()), Shape({1, 3, 3, 2})); + + uint32_t n = 0; + for (IndexEnumerator e{*(eltwise_sub_data->shape())}; e.valid(); e.advance()) + { + ASSERT_FLOAT_EQ(eltwise_sub_data->as_f32_bufptr()->at(e.current()), out_val[n++]); + } + + ASSERT_EQ(locomotiv::annot_domain(eltwise_sub), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/FeatureCodec.test.cpp b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp new file mode 100644 index 000000000..c35f0e69a --- /dev/null +++ b/compiler/locomotiv/src/Node/FeatureCodec.test.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <loco/IR/PermutingCodec.h> + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::Buffer; + +// This file is intended to test FeatureEncode and FeatureDecode at once +namespace +{ + +class NodeExecution_FeatureCodec : public ::testing::Test +{ +private: + loco::Graph g; + +protected: + /// @brief Make Pull node and set data by given buffer and data type + template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype) + { + auto pull = g.nodes()->create<loco::Pull>(); + pull->dtype(dtype); + + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + return pull; + } + + /// @brief Make FeatureEncode node with given input and encoding permutation + loco::FeatureEncode *feature_encode_layer(loco::Node *input, + const loco::Permutation<loco::Domain::Feature> &perm) + { + auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Feature>>( + new loco::PermutingEncoder<loco::Domain::Feature>); + + encoder->perm(perm); + + auto enc = g.nodes()->create<loco::FeatureEncode>(); + enc->input(input); + enc->encoder(std::move(encoder)); + + return enc; + } + + /// @brief Make FeatureDecode node with given input and decoding permutation + loco::FeatureDecode *feature_decode_layer(loco::Node *input, + const loco::Permutation<loco::Domain::Feature> &perm) + { + auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Feature>>( + new loco::PermutingDecoder<loco::Domain::Feature>); + + decoder->perm(perm); + + auto dec = g.nodes()->create<loco::FeatureDecode>(); + dec->input(input); + dec->decoder(std::move(decoder)); + + return dec; + } +}; + +} // namespace + +TEST_F(NodeExecution_FeatureCodec, s32) +{ + const uint32_t N = 2; + const uint32_t H = 3; + const uint32_t W = 4; + const uint32_t C = 5; + + // Make "NCHW" data for pull node + auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W}); + int32_t i = 0; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = i; + ++i; // Doesn't matter what it is + } + + // Make NCHW permutation for encoder and decoder + loco::Permutation<loco::Domain::Feature> NCHW; + + NCHW.axis(loco::FeatureAxis::Count) = 0; + NCHW.axis(loco::FeatureAxis::Depth) = 1; + NCHW.axis(loco::FeatureAxis::Height) = 2; + NCHW.axis(loco::FeatureAxis::Width) = 3; + + // Pull + auto pull = pull_layer(pull_buf, loco::DataType::S32); + + // FeatureEncode + auto enc = feature_encode_layer(pull, NCHW); + locomotiv::NodeExecution::get().run(enc); + + // Test FeatureEncode + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC + auto enc_buf = enc_data->as_s32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature); + + // FeatureDecode + auto dec = feature_decode_layer(enc, NCHW); + locomotiv::NodeExecution::get().run(dec); + + // Test FeatureDecode: Encode -> Decode == identity + auto dec_data = locomotiv::annot_data(dec); + ASSERT_NE(dec_data, nullptr); + ASSERT_EQ(dec_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(dec_data->shape()), (Shape{N, C, H, W})); + auto dec_buf = dec_data->as_s32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), dec_buf->at(Index{n, c, h, w})); + + ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor); +} + +TEST_F(NodeExecution_FeatureCodec, f32) +{ + const uint32_t N = 2; + const uint32_t H = 3; + const uint32_t W = 4; + const uint32_t C = 5; + + // Make crazy "CHNW" data for pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W}); + float f = 0.0f; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = f; + f += 0.1f; // Doesn't matter what it is + } + + // Make CHNW permutation for encoder and decoder + loco::Permutation<loco::Domain::Feature> CHNW; + + CHNW.axis(loco::FeatureAxis::Depth) = 0; + CHNW.axis(loco::FeatureAxis::Height) = 1; + CHNW.axis(loco::FeatureAxis::Count) = 2; + CHNW.axis(loco::FeatureAxis::Width) = 3; + + // Pull + auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32); + + // FeatureEncode + auto enc = feature_encode_layer(pull, CHNW); + locomotiv::NodeExecution::get().run(enc); + + // Test FeatureEncode + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv feature is NHWC + auto enc_buf = enc_data->as_f32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Feature); + + // FeatureDecode + auto dec = feature_decode_layer(enc, CHNW); + locomotiv::NodeExecution::get().run(dec); + + // Test FeatureDecode: Encode -> Decode == identity + auto dec_data = locomotiv::annot_data(dec); + ASSERT_NE(dec_data, nullptr); + ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(dec_data->shape()), (Shape{C, H, N, W})); + auto dec_buf = dec_data->as_f32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), dec_buf->at(Index{c, h, n, w})); + + ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp new file mode 100644 index 000000000..8a56a56b2 --- /dev/null +++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::Index; + +template <typename T> +std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *node, + const Buffer<T> *input_buf) +{ + auto decoder = node->decoder(); + + // Make FeatureShape from input. Note that feature in locomotiv represented as NHWC + loco::FeatureShape input_shape; + assert(input_buf->shape().rank() == 4); + input_shape.count() = input_buf->shape().dim(0); + input_shape.height() = input_buf->shape().dim(1); + input_shape.width() = input_buf->shape().dim(2); + input_shape.depth() = input_buf->shape().dim(3); + + loco::TensorShape node_shape = decoder->shape(input_shape); + + // Make tensor buffer from TensorShape + Buffer<T> node_buf = + make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value(), + node_shape.dim(2).value(), node_shape.dim(3).value()}); + + // Copy buffer in an order arranged by decoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::FeatureIndex feature_index = decoder->value(e.current()); + Index buf_index({feature_index.batch(), feature_index.row(), feature_index.column(), + feature_index.channel()}); + + node_buf.at(e.current()) = input_buf->at(buf_index); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::FeatureDecode *dec) +{ + auto input_data = annot_data(dec->input()); + + validate(input_data, "Input of FeatureDecode not ready"); + validate(annot_domain(dec->input()) == loco::Domain::Feature, + "Input of FeatureDecode is not Feature"); + validate(input_data->shape()->rank() == 4, "Input shape mismatch"); + + std::unique_ptr<NodeData> dec_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_buf = input_data->as_s32_bufptr(); + dec_data = feature_decode<int32_t>(dec, input_buf); + break; + } + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + dec_data = feature_decode<float>(dec, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(dec_data != nullptr); + annot_data(dec, std::move(dec_data)); + annot_domain(dec, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/FeatureEncode.cpp b/compiler/locomotiv/src/Node/FeatureEncode.cpp new file mode 100644 index 000000000..406de76ff --- /dev/null +++ b/compiler/locomotiv/src/Node/FeatureEncode.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; + +template <typename T> +std::unique_ptr<locomotiv::NodeData> feature_encode(const loco::FeatureEncode *node, + const Buffer<T> *input_buf) +{ + auto encoder = node->encoder(); + + // Make TensorShape from input + loco::TensorShape input_shape; + input_shape.rank(input_buf->shape().rank()); + assert(input_shape.rank() == 4); + for (uint32_t i = 0; i < input_shape.rank(); ++i) + { + input_shape.dim(i) = input_buf->shape().dim(i); + } + + loco::FeatureShape node_shape = encoder->shape(input_shape); + + // Make NHWC buffer from FeatureShape + Buffer<T> node_buf = + make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(), + node_shape.width().value(), node_shape.depth().value()}); + + // Copy buffer in an order arranged by encoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::FeatureIndex index; + index.batch() = e.current().at(0); + index.row() = e.current().at(1); + index.column() = e.current().at(2); + index.channel() = e.current().at(3); + + node_buf.at(e.current()) = input_buf->at(encoder->value(index)); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::FeatureEncode *enc) +{ + auto input_data = annot_data(enc->input()); + + validate(input_data, "Input of FeatureEncode not ready"); + validate(annot_domain(enc->input()) == loco::Domain::Tensor, + "Input of FeatureEncode is not Tensor"); + validate(input_data->shape()->rank() == 4, "Input shape mismatch"); + + std::unique_ptr<NodeData> enc_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_buf = input_data->as_s32_bufptr(); + enc_data = feature_encode<int32_t>(enc, input_buf); + break; + } + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + enc_data = feature_encode<float>(enc, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(enc_data != nullptr); + annot_data(enc, std::move(enc_data)); + annot_domain(enc, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp new file mode 100644 index 000000000..cd9d708dc --- /dev/null +++ b/compiler/locomotiv/src/Node/FilterEncode.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; + +template <typename T> +std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *node, + const Buffer<T> *input_buf) +{ + auto encoder = node->encoder(); + + // Make TensorShape from input + loco::TensorShape input_shape; + input_shape.rank(input_buf->shape().rank()); + assert(input_shape.rank() == 4); + for (uint32_t i = 0; i < input_shape.rank(); ++i) + { + input_shape.dim(i) = input_buf->shape().dim(i); + } + + loco::FilterShape node_shape = encoder->shape(input_shape); + + // Make NHWC buffer from FilterShape + Buffer<T> node_buf = + make_buffer<T, LexicalLayout>(Shape{node_shape.count().value(), node_shape.height().value(), + node_shape.width().value(), node_shape.depth().value()}); + + // Copy buffer in an order arranged by encoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::FilterIndex index; + index.nth() = e.current().at(0); + index.row() = e.current().at(1); + index.column() = e.current().at(2); + index.channel() = e.current().at(3); + + node_buf.at(e.current()) = input_buf->at(encoder->value(index)); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::FilterEncode *enc) +{ + auto input_data = annot_data(enc->input()); + + validate(input_data, "Input of FilterEncode not ready"); + validate(annot_domain(enc->input()) == loco::Domain::Tensor, + "Input of FilterEncode is not Tensor"); + validate(input_data->shape()->rank() == 4, "Input shape mismatch"); + + std::unique_ptr<NodeData> enc_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_buf = input_data->as_s32_bufptr(); + enc_data = filter_encode<int32_t>(enc, input_buf); + break; + } + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + enc_data = filter_encode<float>(enc, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(enc_data != nullptr); + annot_data(enc, std::move(enc_data)); + annot_domain(enc, loco::Domain::Filter); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/FilterEncode.test.cpp b/compiler/locomotiv/src/Node/FilterEncode.test.cpp new file mode 100644 index 000000000..79b8308e2 --- /dev/null +++ b/compiler/locomotiv/src/Node/FilterEncode.test.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <loco/IR/PermutingCodec.h> + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; + +TEST(NodeExecution_FilterEncode, s32) +{ + const uint32_t N = 2; + const uint32_t H = 3; + const uint32_t W = 4; + const uint32_t C = 5; + + auto g = loco::make_graph(); + + // Pull + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::S32); + + // Make and assign "NCHW" data to pull node + auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{N, C, H, W}); + int32_t i = 1; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = i; + ++i; // Doesn't matter what it is + } + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + // Encoder to correctly read input tensor as NCHW + auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>( + new loco::PermutingEncoder<loco::Domain::Filter>); + encoder->perm()->axis(loco::FilterAxis::Count) = 0; + encoder->perm()->axis(loco::FilterAxis::Depth) = 1; + encoder->perm()->axis(loco::FilterAxis::Height) = 2; + encoder->perm()->axis(loco::FilterAxis::Width) = 3; + + // FilterEncode + auto enc = g->nodes()->create<loco::FilterEncode>(); + enc->input(pull); + enc->encoder(std::move(encoder)); + + locomotiv::NodeExecution::get().run(enc); + + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC + auto enc_buf = enc_data->as_s32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_EQ(pull_buf.at(Index{n, c, h, w}), enc_buf->at(Index{n, h, w, c})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter); +} + +TEST(NodeExecution_FilterEncode, f32) +{ + const uint32_t N = 2; + const uint32_t H = 3; + const uint32_t W = 4; + const uint32_t C = 5; + + auto g = loco::make_graph(); + + // Pull + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + + // Make and assign crazy "CHNW" data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{C, H, N, W}); + float f = 1; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = f; + f += 0.1f; // Doesn't matter what it is + } + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + // Encoder to correctly read input tensor as CHNW + auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Filter>>( + new loco::PermutingEncoder<loco::Domain::Filter>); + encoder->perm()->axis(loco::FilterAxis::Depth) = 0; + encoder->perm()->axis(loco::FilterAxis::Height) = 1; + encoder->perm()->axis(loco::FilterAxis::Count) = 2; + encoder->perm()->axis(loco::FilterAxis::Width) = 3; + + // FilterEncode + auto enc = g->nodes()->create<loco::FilterEncode>(); + enc->input(pull); + enc->encoder(std::move(encoder)); + + locomotiv::NodeExecution::get().run(enc); + + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(enc_data->shape()), (Shape{N, H, W, C})); // locomotiv filter is NHWC + auto enc_buf = enc_data->as_f32_bufptr(); + for (uint32_t n = 0; n < N; ++n) + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + for (uint32_t c = 0; c < C; ++c) + ASSERT_FLOAT_EQ(pull_buf.at(Index{c, h, n, w}), enc_buf->at(Index{n, h, w, c})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Filter); +} diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp new file mode 100644 index 000000000..eb7d44a59 --- /dev/null +++ b/compiler/locomotiv/src/Node/Forward.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <stdexcept> +#include <cassert> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Forward *forward) +{ + auto input_data = annot_data(forward->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(forward->input()) != loco::Domain::Unknown, + "Input domain must not Unknown"); + + std::unique_ptr<NodeData> forward_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_bufptr = input_data->as_s32_bufptr(); + forward_data = make_data(*input_bufptr); + break; + } + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + forward_data = make_data(*input_bufptr); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(forward_data != nullptr); + annot_data(forward, std::move(forward_data)); + annot_domain(forward, annot_domain(forward->input())); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Forward.test.cpp b/compiler/locomotiv/src/Node/Forward.test.cpp new file mode 100644 index 000000000..73d37139a --- /dev/null +++ b/compiler/locomotiv/src/Node/Forward.test.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Forward, s32) +{ + // Make pull-forward graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::S32); + pull->shape({1}); + auto forward = g->nodes()->create<loco::Forward>(); + forward->input(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1}); + pull_buf.at(Index{0}) = 42; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(forward); + + auto forward_data = locomotiv::annot_data(forward); + ASSERT_NE(forward_data, nullptr); + ASSERT_EQ(forward_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(forward_data->shape()), Shape{1}); + ASSERT_EQ(forward_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0})); + + ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor); +} + +TEST(NodeExecution_Forward, f32) +{ + // Make pull-forward graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({1}); + auto forward = g->nodes()->create<loco::Forward>(); + forward->input(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1}); + pull_buf.at(Index{0}) = 3.14f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(forward); + + auto forward_data = locomotiv::annot_data(forward); + ASSERT_NE(forward_data, nullptr); + ASSERT_EQ(forward_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(forward_data->shape()), Shape{1}); + ASSERT_FLOAT_EQ(forward_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0})); + + ASSERT_EQ(locomotiv::annot_domain(forward), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp new file mode 100644 index 000000000..77b7315a9 --- /dev/null +++ b/compiler/locomotiv/src/Node/MatMul.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Calculate Matrix Multiplication + */ +template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buffer<T> *rhs_buf) +{ + const auto lhs_shape = lhs_buf->shape(); + const auto rhs_shape = rhs_buf->shape(); + + assert(lhs_shape.rank() == 2 && "lhs rank must be 2"); + assert(rhs_shape.rank() == 2 && "rhs rank must be 2"); + // lhs width should be the same as rhs height + assert(lhs_shape.dim(1) == rhs_shape.dim(0) && "height/width mismatch"); + + const uint32_t lhs_height = lhs_shape.dim(0); + const uint32_t lhs_width = lhs_shape.dim(1); + + const uint32_t rhs_width = rhs_shape.dim(1); + + const uint32_t output_height = lhs_height; + const uint32_t output_width = rhs_width; + + Shape output_shape{output_height, output_width}; + auto output_buf = make_buffer<T, LexicalLayout>(output_shape); + + for (uint32_t out_y = 0; out_y < output_height; ++out_y) + { + for (uint32_t out_x = 0; out_x < output_width; ++out_x) + { + T total = static_cast<T>(0); // accumulator + // Accumulate through axis + for (uint32_t axis = 0; axis < lhs_width; ++axis) + { + total += lhs_buf->at(Index({out_y, axis})) * rhs_buf->at(Index({axis, out_x})); + } + // Set output value + output_buf.at(Index({out_y, out_x})) = total; + } + } + + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MatMul *mat_mul) +{ + auto lhs_data = annot_data(mat_mul->lhs()); + auto rhs_data = annot_data(mat_mul->rhs()); + + validate(lhs_data, "Can't find left matrix data of MatMul"); + validate(lhs_data->shape()->rank() == 2, "lhs rank must be 2"); + + validate(rhs_data, "Can't find right matrix data of MatMul"); + validate(rhs_data->shape()->rank() == 2, "rhs rank must be 2"); + + validate(annot_domain(mat_mul->lhs()) == loco::Domain::Matrix, + "Left matrix of MatMul is not a Matrix"); + validate(annot_domain(mat_mul->rhs()) == loco::Domain::Matrix, + "Right matrix of MatMul is not a Matrix"); + + std::unique_ptr<NodeData> mat_mul_result = nullptr; + + if (lhs_data->dtype() == loco::DataType::FLOAT32 && rhs_data->dtype() == loco::DataType::FLOAT32) + { + const auto lhs_buf = lhs_data->as_f32_bufptr(); + const auto rhs_buf = rhs_data->as_f32_bufptr(); + + auto mat_mul_buf = calc_mat_mul<float>(lhs_buf, rhs_buf); + + mat_mul_result = make_data(mat_mul_buf); + } + else if (lhs_data->dtype() == loco::DataType::S32 && rhs_data->dtype() == loco::DataType::S32) + { + const auto lhs_buf = lhs_data->as_s32_bufptr(); + const auto rhs_buf = rhs_data->as_s32_bufptr(); + + auto mat_mul_buf = calc_mat_mul<int32_t>(lhs_buf, rhs_buf); + + mat_mul_result = make_data(mat_mul_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(mat_mul_result != nullptr); + + annot_data(mat_mul, std::move(mat_mul_result)); + annot_domain(mat_mul, loco::Domain::Matrix); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MatMul.test.cpp b/compiler/locomotiv/src/Node/MatMul.test.cpp new file mode 100644 index 000000000..bd480f7c7 --- /dev/null +++ b/compiler/locomotiv/src/Node/MatMul.test.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +template <typename T> +void run_test(const T *lhs, const T *rhs, const T *expected_output, const Shape &lhs_shape, + const Shape &rhs_shape, const Shape &out_shape, loco::DataType expected_datatype) +{ + auto g = loco::make_graph(); + // Fill lhs MatrixEncode + auto lhs_enc = g->nodes()->create<loco::MatrixEncode>(); + { + auto lhs_enc_buf = make_buffer<T, LexicalLayout>(lhs_shape); + auto lhs_overlay = make_overlay<T, LexicalLayout>(lhs_shape, const_cast<T *>(lhs)); + for (nncc::core::ADT::tensor::IndexEnumerator e{lhs_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + lhs_enc_buf.at(ind) = lhs_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(lhs_enc_buf); + locomotiv::annot_data(lhs_enc, std::move(enc_data)); + locomotiv::annot_domain(lhs_enc, loco::Domain::Matrix); + } + // Fill rhs MatrixEncode + auto rhs_enc = g->nodes()->create<loco::MatrixEncode>(); + { + auto rhs_enc_buf = make_buffer<T, LexicalLayout>(rhs_shape); + auto rhs_overlay = make_overlay<T, LexicalLayout>(rhs_shape, const_cast<T *>(rhs)); + for (nncc::core::ADT::tensor::IndexEnumerator e{rhs_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + rhs_enc_buf.at(ind) = rhs_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(rhs_enc_buf); + locomotiv::annot_data(rhs_enc, std::move(enc_data)); + locomotiv::annot_domain(rhs_enc, loco::Domain::Matrix); + } + + // build MatMul + auto mat_mul = g->nodes()->create<loco::MatMul>(); + mat_mul->lhs(lhs_enc); + mat_mul->rhs(rhs_enc); + + // run interpreter + locomotiv::NodeExecution::get().run(mat_mul); + + // get result of calculation + auto mat_mul_result = locomotiv::annot_data(mat_mul); + + // check the result + ASSERT_NE(mat_mul_result, nullptr); + ASSERT_TRUE(mat_mul_result->dtype() == expected_datatype); + ASSERT_TRUE(*(mat_mul_result->shape()) == out_shape); + + auto out_overlay = make_overlay<T, LexicalLayout>(out_shape, const_cast<T *>(expected_output)); + for (nncc::core::ADT::tensor::IndexEnumerator e{out_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + if (expected_datatype == loco::DataType::FLOAT32) + ASSERT_FLOAT_EQ(mat_mul_result->as_f32_bufptr()->at(ind), out_overlay.at(ind)); + else if (expected_datatype == loco::DataType::S32) + ASSERT_EQ(mat_mul_result->as_s32_bufptr()->at(ind), out_overlay.at(ind)); + else + throw std::runtime_error("NYI for these DataTypes"); + } + + ASSERT_EQ(locomotiv::annot_domain(mat_mul), loco::Domain::Matrix); +} + +} // namespace + +// clang-format off +/* from the code below: + +import numpy as np + +a = [[-0.48850584, 1.4292705, -1.3424522], + [1.7021934, -0.39246717, 0.6248314]] + +b = [[-0.0830195, 0.21088193, -0.11781317], + [0.07755677, 1.6337638, 1.0792778], + [-1.6922939, -1.5437212, 0.96667504]] + +print(np.array(a) @ np.array(b)) +*/ +TEST(NodeExecution_MatMul, f32_2x3_3x3) +{ + using nncc::core::ADT::tensor::Shape; + + const float lhs[] = + { + -0.48850584, 1.4292705, -1.3424522, + 1.7021934, -0.39246717, 0.6248314 + }; + + const float rhs[] = + { + -0.0830195, 0.21088193, -0.11781317, + 0.07755677, 1.6337638, 1.0792778, + -1.6922939, -1.5437212, 0.96667504 + }; + + const float out[] = + { + 2.42322878, 4.30444527, 0.30241731, + -1.2291521, -1.2468023, -0.02011299 + }; + + run_test<float>(lhs, rhs, out, Shape{2, 3}, Shape{3, 3}, Shape{2, 3}, loco::DataType::FLOAT32); +} + +/* from the code below: + +import numpy as np + +a = np.random.randint(10000, size=(4, 2)) + +b = np.random.randint(10000, size=(2, 6)) + +print(a) +print(b) +print(np.array(a) @ np.array(b)) +*/ +TEST(NodeExecution_MatMul, s32_4x2_2x6) +{ + using nncc::core::ADT::tensor::Shape; + + const int32_t lhs[] = + { + 6392, 4993, + 54, 9037, + 3947, 5820, + 5800, 4181 + }; + + const int32_t rhs[] = + { + 2694, 8376, 8090, 1285, 7492, 1652, + 5427, 8798, 7634, 2229, 5439, 6999 + }; + + const int32_t out[] = + { + 44317059, 97467806, 89827842, 19343117, 75045791, 45505591, + 49189275, 79959830, 69425318, 20212863, 49556811, 63339171, + 42218358, 84264432, 76361110, 18044675, 61225904, 47254624, + 38315487, 85365238, 78839754, 16772449, 66194059, 38844419 + }; + + run_test<int32_t>(lhs, rhs, out, Shape{4, 2}, Shape{2, 6}, Shape{4, 6}, loco::DataType::S32); +} + +// clang-format on diff --git a/compiler/locomotiv/src/Node/MatrixCodec.test.cpp b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp new file mode 100644 index 000000000..8fc5d593b --- /dev/null +++ b/compiler/locomotiv/src/Node/MatrixCodec.test.cpp @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <loco/IR/PermutingCodec.h> + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::Buffer; + +// This file is intended to test MatrixEncode and MatrixDecode at once +namespace +{ + +class NodeExecution_MatrixCodec : public ::testing::Test +{ +private: + loco::Graph g; + +protected: + /// @brief Make Pull node and set data by given buffer and data type + template <typename DT> loco::Pull *pull_layer(Buffer<DT> &pull_buf, loco::DataType dtype) + { + auto pull = g.nodes()->create<loco::Pull>(); + pull->dtype(dtype); + + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + return pull; + } + + /// @brief Make MatrixEncode node with given input and encoding permutation + loco::MatrixEncode *matrix_encode_layer(loco::Node *input, + const loco::Permutation<loco::Domain::Matrix> &perm) + { + auto encoder = std::unique_ptr<loco::PermutingEncoder<loco::Domain::Matrix>>( + new loco::PermutingEncoder<loco::Domain::Matrix>); + + encoder->perm(perm); + + auto enc = g.nodes()->create<loco::MatrixEncode>(); + enc->input(input); + enc->encoder(std::move(encoder)); + + return enc; + } + + /// @brief Make MatrixDecode node with given input and decoding permutation + loco::MatrixDecode *matrix_decode_layer(loco::Node *input, + const loco::Permutation<loco::Domain::Matrix> &perm) + { + auto decoder = std::unique_ptr<loco::PermutingDecoder<loco::Domain::Matrix>>( + new loco::PermutingDecoder<loco::Domain::Matrix>); + + decoder->perm(perm); + + auto dec = g.nodes()->create<loco::MatrixDecode>(); + dec->input(input); + dec->decoder(std::move(decoder)); + + return dec; + } +}; + +} // namespace + +TEST_F(NodeExecution_MatrixCodec, HW_s32) +{ + const uint32_t H = 3; + const uint32_t W = 4; + + // Make HW data for pull node + auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{H, W}); + int32_t i = 0; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = i; + ++i; // Doesn't matter what it is + } + + // Make HW permutation for encoder and decoder + loco::Permutation<loco::Domain::Matrix> HW; + + HW.axis(loco::MatrixAxis::Height) = 0; + HW.axis(loco::MatrixAxis::Width) = 1; + + // Pull + auto pull = pull_layer(pull_buf, loco::DataType::S32); + + // MatrixEncode + auto enc = matrix_encode_layer(pull, HW); + locomotiv::NodeExecution::get().run(enc); + + // Test MatrixEncode + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW + auto enc_buf = enc_data->as_s32_bufptr(); + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + ASSERT_EQ(pull_buf.at(Index{h, w}), enc_buf->at(Index{h, w})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix); + + // MatrixDecode + auto dec = matrix_decode_layer(enc, HW); + locomotiv::NodeExecution::get().run(dec); + + // Test MatrixDecode: Encode -> Decode == identity + auto dec_data = locomotiv::annot_data(dec); + ASSERT_NE(dec_data, nullptr); + ASSERT_EQ(dec_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(dec_data->shape()), (Shape{H, W})); + auto dec_buf = dec_data->as_s32_bufptr(); + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + ASSERT_EQ(pull_buf.at(Index{h, w}), dec_buf->at(Index{h, w})); + + ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor); +} + +TEST_F(NodeExecution_MatrixCodec, WH_f32) +{ + const uint32_t W = 6; + const uint32_t H = 5; + + // Make crazy WH data for pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{W, H}); + float f = 0.0f; + for (IndexEnumerator e{pull_buf.shape()}; e.valid(); e.advance()) + { + pull_buf.at(e.current()) = f; + f += 0.1f; // Doesn't matter what it is + } + + // Make WH permutation for encoder and decoder + loco::Permutation<loco::Domain::Matrix> WH; + + WH.axis(loco::MatrixAxis::Width) = 0; + WH.axis(loco::MatrixAxis::Height) = 1; + + // Pull + auto pull = pull_layer(pull_buf, loco::DataType::FLOAT32); + + // MatrixEncode + auto enc = matrix_encode_layer(pull, WH); + locomotiv::NodeExecution::get().run(enc); + + // Test MatrixEncode + auto enc_data = locomotiv::annot_data(enc); + ASSERT_NE(enc_data, nullptr); + ASSERT_EQ(enc_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(enc_data->shape()), (Shape{H, W})); // locomotiv matrix is HW + auto enc_buf = enc_data->as_f32_bufptr(); + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), enc_buf->at(Index{h, w})); + + ASSERT_EQ(locomotiv::annot_domain(enc), loco::Domain::Matrix); + + // MatrixDecode + auto dec = matrix_decode_layer(enc, WH); + locomotiv::NodeExecution::get().run(dec); + + // Test MatrixDecode: Encode -> Decode == identity + auto dec_data = locomotiv::annot_data(dec); + ASSERT_NE(dec_data, nullptr); + ASSERT_EQ(dec_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(dec_data->shape()), (Shape{W, H})); + auto dec_buf = dec_data->as_f32_bufptr(); + for (uint32_t h = 0; h < H; ++h) + for (uint32_t w = 0; w < W; ++w) + ASSERT_FLOAT_EQ(pull_buf.at(Index{w, h}), dec_buf->at(Index{w, h})); + + ASSERT_EQ(locomotiv::annot_domain(dec), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp new file mode 100644 index 000000000..c591676ae --- /dev/null +++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::Index; + +template <typename T> +std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *node, + const Buffer<T> *input_buf) +{ + auto decoder = node->decoder(); + + // Make MatrixShape from input. Note that matrix in locomotiv represented as HW + loco::MatrixShape input_shape; + assert(input_buf->shape().rank() == 2); + input_shape.height() = input_buf->shape().dim(0); + input_shape.width() = input_buf->shape().dim(1); + + loco::TensorShape node_shape = decoder->shape(input_shape); + + // Make tensor buffer from TensorShape + Buffer<T> node_buf = + make_buffer<T, LexicalLayout>(Shape{node_shape.dim(0).value(), node_shape.dim(1).value()}); + + // Copy buffer in an order arranged by decoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::MatrixIndex matrix_index = decoder->value(e.current()); + Index buf_index({matrix_index.row(), matrix_index.column()}); + + node_buf.at(e.current()) = input_buf->at(buf_index); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MatrixDecode *matrix_dec) +{ + auto input_data = annot_data(matrix_dec->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(matrix_dec->input()) == loco::Domain::Matrix, + "Input domain should be Matrix"); + validate(input_data->shape()->rank() == 2, "Input data rank must be 2"); + + std::unique_ptr<NodeData> matrix_dec_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_buf = input_data->as_s32_bufptr(); + matrix_dec_data = matrix_decode<int32_t>(matrix_dec, input_buf); + break; + } + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + matrix_dec_data = matrix_decode<float>(matrix_dec, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(matrix_dec_data != nullptr); + + annot_data(matrix_dec, std::move(matrix_dec_data)); + annot_domain(matrix_dec, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MatrixEncode.cpp b/compiler/locomotiv/src/Node/MatrixEncode.cpp new file mode 100644 index 000000000..e3554e15a --- /dev/null +++ b/compiler/locomotiv/src/Node/MatrixEncode.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> + +#include <stdexcept> +#include <cassert> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::IndexEnumerator; + +template <typename T> +std::unique_ptr<locomotiv::NodeData> matrix_encode(const loco::MatrixEncode *node, + const Buffer<T> *input_buf) +{ + auto encoder = node->encoder(); + + // Make TensorShape from input + loco::TensorShape input_shape; + input_shape.rank(input_buf->shape().rank()); + assert(input_shape.rank() == 2); + for (uint32_t i = 0; i < input_shape.rank(); ++i) + { + input_shape.dim(i) = input_buf->shape().dim(i); + } + + loco::MatrixShape node_shape = encoder->shape(input_shape); + + // Make HW buffer from MatrixShape + Buffer<T> node_buf = + make_buffer<T, LexicalLayout>(Shape{node_shape.height().value(), node_shape.width().value()}); + + // Copy buffer in an order arranged by encoder + for (IndexEnumerator e{node_buf.shape()}; e.valid(); e.advance()) + { + loco::MatrixIndex index; + index.row() = e.current().at(0); + index.column() = e.current().at(1); + + node_buf.at(e.current()) = input_buf->at(encoder->value(index)); + } + + return locomotiv::make_data(node_buf); +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MatrixEncode *matrix_enc) +{ + auto input_data = annot_data(matrix_enc->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(matrix_enc->input()) == loco::Domain::Tensor, + "Input domain should be Tensor"); + validate(input_data->shape()->rank() == 2, "Input data rank must be 2"); + + std::unique_ptr<NodeData> matrix_enc_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_buf = input_data->as_s32_bufptr(); + matrix_enc_data = matrix_encode<int32_t>(matrix_enc, input_buf); + break; + } + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + matrix_enc_data = matrix_encode<float>(matrix_enc, input_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(matrix_enc_data != nullptr); + + annot_data(matrix_enc, std::move(matrix_enc_data)); + annot_domain(matrix_enc, loco::Domain::Matrix); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp new file mode 100644 index 000000000..5d92f89f5 --- /dev/null +++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <limits> +#include <cassert> +#include <algorithm> +#include <stdexcept> + +namespace +{ + +/** + * @brief Compute 1D output size based on given 1D arguments. + * + * @param whole_pad Sum of front and back pad + */ +inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size, + uint32_t stride) +{ + assert((image_size + whole_pad - filter_size) % stride == 0); + return (image_size + whole_pad - filter_size) / stride + 1; +} + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +template <typename T> +nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d, + const Buffer<T> *ifm_buf) +{ + auto ifm_shape = ifm_buf->shape(); + + const uint32_t batches = ifm_shape.dim(0); + const uint32_t depth = ifm_shape.dim(3); + + const uint32_t ifm_height = ifm_shape.dim(1); + const uint32_t ifm_width = ifm_shape.dim(2); + + const uint32_t window_height = maxpool2d->window()->vertical(); + const uint32_t window_width = maxpool2d->window()->horizontal(); + + const uint32_t stride_height = maxpool2d->stride()->vertical(); + const uint32_t stride_width = maxpool2d->stride()->horizontal(); + + const uint32_t pad_top = maxpool2d->pad()->top(); + const uint32_t pad_bottom = maxpool2d->pad()->bottom(); + + const uint32_t pad_left = maxpool2d->pad()->left(); + const uint32_t pad_right = maxpool2d->pad()->right(); + + const uint32_t output_height = + compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height); + const uint32_t output_width = + compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width); + + // prepare output buffer + Shape output_shape{batches, output_height, output_width, depth}; + auto output_buf = make_buffer<T, LexicalLayout>(output_shape); + + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t out_y = 0; out_y < output_height; ++out_y) + { + for (uint32_t out_x = 0; out_x < output_width; ++out_x) + { + for (uint32_t channel = 0; channel < depth; ++channel) + { + const int in_x_origin = (out_x * stride_width) - pad_left; + const int in_y_origin = (out_y * stride_height) - pad_top; + + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const uint32_t filter_x_start = std::max(0, -in_x_origin); + const uint32_t filter_x_end = std::min(window_width, ifm_width - in_x_origin); + + const uint32_t filter_y_start = std::max(0, -in_y_origin); + const uint32_t filter_y_end = std::min(window_height, ifm_height - in_y_origin); + + T max = std::numeric_limits<T>::lowest(); + + for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) + { + for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) + { + const uint32_t in_x = in_x_origin + filter_x; + const uint32_t in_y = in_y_origin + filter_y; + max = std::max(max, ifm_buf->at(Index({batch, in_y, in_x, channel}))); + } + } + + output_buf.at(Index({batch, out_y, out_x, channel})) = max; + } + } + } + } + + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::MaxPool2D *maxpool2d) +{ + auto ifm_data = annot_data(maxpool2d->ifm()); + + validate(ifm_data, "Can't find input data of MaxPool2D"); + validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4"); + validate(annot_domain(maxpool2d->ifm()) == loco::Domain::Feature, + "ifm of MaxPool2D is not Feature"); + + std::unique_ptr<NodeData> maxpool2d_data = nullptr; + + switch (ifm_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + + auto maxpool2d_buf = maxPool2D<float>(maxpool2d, ifm_buf); + + maxpool2d_data = make_data(maxpool2d_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(maxpool2d_data != nullptr); + + annot_data(maxpool2d, std::move(maxpool2d_data)); + annot_domain(maxpool2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/MaxPool2D.test.cpp b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp new file mode 100644 index 000000000..9d877a96b --- /dev/null +++ b/compiler/locomotiv/src/Node/MaxPool2D.test.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *expected_ofm, const Shape &ifm_shape, + const Shape &ofm_shape, const uint32_t window_v, const uint32_t window_h, + const uint32_t stride_v, const uint32_t stride_h, const uint32_t pad_top, + const uint32_t pad_bottom, const uint32_t pad_left, const uint32_t pad_right) +{ + // Let's make FeatureEncode-MaxPool2D graph + auto g = loco::make_graph(); + auto enc = g->nodes()->create<loco::FeatureEncode>(); + + // Fill output data of FeatureEncode from ifm + auto enc_buf = make_buffer<float, LexicalLayout>(ifm_shape); + + auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(enc_buf); + locomotiv::annot_data(enc, std::move(enc_data)); + locomotiv::annot_domain(enc, loco::Domain::Feature); + + // build MaxPool2D + auto maxpool2d = g->nodes()->create<loco::MaxPool2D>(); + maxpool2d->ifm(enc); + maxpool2d->window()->vertical(window_v); + maxpool2d->window()->horizontal(window_h); + maxpool2d->stride()->vertical(stride_v); + maxpool2d->stride()->horizontal(stride_h); + maxpool2d->pad()->top(pad_top); + maxpool2d->pad()->bottom(pad_bottom); + maxpool2d->pad()->left(pad_left); + maxpool2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(maxpool2d); + + // get result of calculation + auto maxpool2d_data = locomotiv::annot_data(maxpool2d); + + // check the result + ASSERT_NE(maxpool2d_data, nullptr); + ASSERT_TRUE(maxpool2d_data->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(maxpool2d_data->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(maxpool2d_data->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(maxpool2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off +/* ifm and ofm are from the code below: + + value = tf.random_normal([1, 3, 3, 1], stddev=1) + maxpool = tf.nn.max_pool(value, ksize = [1, 2, 2, 1], strides = [1, 1, 1, 1], padding= 'VALID', + data_format="NHWC") + with tf.Session() as sess: + print(sess.run(maxpool)) +*/ + +TEST(NodeExecution_MaxPool2D, f32_1x3x3x1_calculation) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + -1.5510627, 0.3653609, 1.9002001, + -0.15861237, -0.32944828, 1.2053918, + 0.50054574, -0.8533826, 0.131492, + }; + + const float ofm[] = + { + 0.3653609, 1.9002001, + 0.50054574, 1.2053918 + }; + + run_test(ifm, ofm, + Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // input shape , output shape + 2, 2, // kernel + 1, 1, // stride + 0, 0, 0, 0 // padding + ); +} + +TEST(NodeExecution_MaxPool2D, with_padding) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = + { + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25 + }; + + const float ofm[] = + { + 7, 9, 10, + 17, 19, 20, + 22, 24, 25 + }; + + run_test(ifm, ofm, + Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, // input shape , output shape + 3, 3, // kernel + 2, 2, // stride + 1, 1, 1, 1 // padding - this mimics SAME padding + ); +} +// clang-format on diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp new file mode 100644 index 000000000..c482d8b04 --- /dev/null +++ b/compiler/locomotiv/src/Node/Pull.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "UserData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <cassert> +#include <stdexcept> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Pull *pull) +{ +// TODO Remove deprecated code +#if 0 + validate(annot_data(pull), "Data for Pull is not ready"); + + validate(annot_domain(pull) == loco::Domain::Tensor, "Domain for Pull is not Tensor"); + + // DO NOTHING +#endif + + auto input_data = user_data(pull); + + validate(input_data, "Input not ready"); + // User always passes a "Tensor" + + std::unique_ptr<NodeData> pull_data = nullptr; + + // Q. Is it possible to use generic one? + switch (input_data->dtype()) + { + case loco::DataType::S32: + { + auto input_bufptr = input_data->as_s32_bufptr(); + pull_data = make_data(*input_bufptr); + break; + } + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + pull_data = make_data(*input_bufptr); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(pull_data != nullptr); + annot_data(pull, std::move(pull_data)); + annot_domain(pull, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Pull.test.cpp b/compiler/locomotiv/src/Node/Pull.test.cpp new file mode 100644 index 000000000..53e78776b --- /dev/null +++ b/compiler/locomotiv/src/Node/Pull.test.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "UserData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Pull, check_data_ready) +{ + // Make graph with Pull node only + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + + // Data not ready yet + ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull)); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1}); + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::user_data(pull, std::move(pull_data)); + +// The behavior of Pull is now consistent with that of other nodes. +// - annot_data and annot_domain is available after evaluating that "pull" node. +// TODO Remove this +#if 0 + // Domain not ready yet + ASSERT_ANY_THROW(locomotiv::NodeExecution::get().run(pull)); + + // Set Domain + locomotiv::annot_domain(pull, loco::Domain::Tensor); +#endif + + // Valid run + ASSERT_NO_THROW(locomotiv::NodeExecution::get().run(pull)); +} diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp new file mode 100644 index 000000000..fc5808b15 --- /dev/null +++ b/compiler/locomotiv/src/Node/Push.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <stdexcept> +#include <cassert> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Push *push) +{ + auto from_data = annot_data(push->from()); + + validate(from_data, "Ingredient not ready"); + validate(annot_domain(push->from()) == loco::Domain::Tensor, "Ingredient of Push is not tensor"); + + std::unique_ptr<NodeData> push_data = nullptr; + + switch (from_data->dtype()) + { + case loco::DataType::S32: + { + auto from_bufptr = from_data->as_s32_bufptr(); + push_data = make_data(*from_bufptr); + break; + } + case loco::DataType::FLOAT32: + { + auto from_bufptr = from_data->as_f32_bufptr(); + push_data = make_data(*from_bufptr); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(push_data != nullptr); + annot_data(push, std::move(push_data)); + annot_domain(push, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Push.test.cpp b/compiler/locomotiv/src/Node/Push.test.cpp new file mode 100644 index 000000000..be8f1e4e9 --- /dev/null +++ b/compiler/locomotiv/src/Node/Push.test.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Push, s32) +{ + // Make pull-push graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::S32); + pull->shape({1}); + auto push = g->nodes()->create<loco::Push>(); + push->from(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<int32_t, LexicalLayout>(Shape{1}); + pull_buf.at(Index{0}) = 42; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(push); + + auto push_data = locomotiv::annot_data(push); + ASSERT_NE(push_data, nullptr); + ASSERT_EQ(push_data->dtype(), loco::DataType::S32); + ASSERT_EQ(*(push_data->shape()), Shape{1}); + ASSERT_EQ(push_data->as_s32_bufptr()->at(Index{0}), pull_buf.at(Index{0})); + + ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor); +} + +TEST(NodeExecution_Push, f32) +{ + // Make pull-push graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({1}); + auto push = g->nodes()->create<loco::Push>(); + push->from(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1}); + pull_buf.at(Index{0}) = 3.14f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(push); + + auto push_data = locomotiv::annot_data(push); + ASSERT_NE(push_data, nullptr); + ASSERT_EQ(push_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(push_data->shape()), Shape{1}); + ASSERT_FLOAT_EQ(push_data->as_f32_bufptr()->at(Index{0}), pull_buf.at(Index{0})); + + ASSERT_EQ(locomotiv::annot_domain(push), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/ReLU.cpp b/compiler/locomotiv/src/Node/ReLU.cpp new file mode 100644 index 000000000..c0f8620e7 --- /dev/null +++ b/compiler/locomotiv/src/Node/ReLU.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +namespace +{ + +inline float relu_ew(float val) { return val > 0.0f ? val : 0.0f; } + +struct Func final : public locomotiv::UnaryFunc +{ + float apply(float v) const final { return relu_ew(v); } +}; + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::ReLU *relu) +{ + Func f; + + eltwise_unary(relu, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/ReLU.test.cpp b/compiler/locomotiv/src/Node/ReLU.test.cpp new file mode 100644 index 000000000..0ddd01d0f --- /dev/null +++ b/compiler/locomotiv/src/Node/ReLU.test.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_ReLU, f32) +{ + // Make pull-relu graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({2}); + auto relu = g->nodes()->create<loco::ReLU>(); + relu->input(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2}); + pull_buf.at(Index{0}) = -10.0f; + pull_buf.at(Index{1}) = 10.0f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(relu); + + auto relu_data = locomotiv::annot_data(relu); + ASSERT_NE(relu_data, nullptr); + ASSERT_EQ(relu_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(relu_data->shape()), Shape{2}); + ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{0}), 0.0f); + ASSERT_FLOAT_EQ(relu_data->as_f32_bufptr()->at(Index{1}), 10.0f); + + ASSERT_EQ(locomotiv::annot_domain(relu), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/ReLU6.cpp b/compiler/locomotiv/src/Node/ReLU6.cpp new file mode 100644 index 000000000..586c015fc --- /dev/null +++ b/compiler/locomotiv/src/Node/ReLU6.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +// TODO Remove deprecated code +#if 0 +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +#include <cassert> +#include <stdexcept> +#endif + +namespace +{ + +inline float relu6_ew(float val) { return val < 0.0f ? 0.0f : (val < 6.0f ? val : 6.0f); } + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::ReLU6 *relu6) +{ +// TODO Remove deprecated code +#if 0 + auto input_data = annot_data(relu6->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(relu6->input()) != loco::Domain::Unknown, + "Input domain of ReLU is Unknown"); + + std::unique_ptr<NodeData> relu6_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + auto *shape = input_data->shape(); + auto relu6_buf = make_buffer<float, LexicalLayout>(*shape); + + for (IndexEnumerator e{*shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + relu6_buf.at(index) = relu6_ew(input_bufptr->at(index)); + } + + relu6_data = make_data(relu6_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(relu6_data != nullptr); + annot_data(relu6, std::move(relu6_data)); + annot_domain(relu6, annot_domain(relu6->input())); +#endif + + struct Func final : public UnaryFunc + { + float apply(float v) const final { return relu6_ew(v); } + }; + + Func f; + + eltwise_unary(relu6, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/ReLU6.test.cpp b/compiler/locomotiv/src/Node/ReLU6.test.cpp new file mode 100644 index 000000000..07f6af23f --- /dev/null +++ b/compiler/locomotiv/src/Node/ReLU6.test.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_ReLU6, f32) +{ + // Make pull-relu6 graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({2, 2}); + auto relu6 = g->nodes()->create<loco::ReLU6>(); + relu6->input(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{2, 2}); + pull_buf.at(Index{0, 0}) = -5.0f; + pull_buf.at(Index{0, 1}) = 6.0f; + pull_buf.at(Index{1, 0}) = 7.0f; + pull_buf.at(Index{1, 1}) = -8.0f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(relu6); + + auto relu6_data = locomotiv::annot_data(relu6); + ASSERT_NE(relu6_data, nullptr); + ASSERT_EQ(relu6_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(relu6_data->shape()), Shape({2, 2})); + ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f); + ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{0, 1}), 6.0f); + ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 0}), 6.0f); + ASSERT_FLOAT_EQ(relu6_data->as_f32_bufptr()->at(Index{1, 1}), 0.0f); + + ASSERT_EQ(locomotiv::annot_domain(relu6), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp new file mode 100644 index 000000000..ac1672024 --- /dev/null +++ b/compiler/locomotiv/src/Node/Reshape.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::num_elements; + +#include <cassert> +#include <stdexcept> +#include <cstring> +#include <vector> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape) +{ + auto input_data = annot_data(reshape->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(reshape->input()) == loco::Domain::Tensor, + "Input domain of Reshape is not Tensor"); + + std::unique_ptr<NodeData> reshape_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + auto *input_shape = input_data->shape(); + + using Shape = nncc::core::ADT::tensor::Shape; + std::unique_ptr<Shape> output_shape(new Shape()); + + output_shape->resize(reshape->rank()); + for (uint32_t axis = 0; axis < output_shape->rank(); ++axis) + { + output_shape->dim(axis) = reshape->dim(axis).value(); + } + + auto reshape_bufptr = make_buffer<float, LexicalLayout>(*output_shape); + + float *input_ptr = const_cast<float *>(input_bufptr->base()); + uint64_t input_len = num_elements(*input_shape) * sizeof(float); + + float *output_ptr = reshape_bufptr.base(); + + assert(input_len == num_elements(*output_shape) * sizeof(float)); + memcpy(output_ptr, input_ptr, input_len); + + reshape_data = make_data(reshape_bufptr); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(reshape_data != nullptr); + annot_data(reshape, std::move(reshape_data)); + annot_domain(reshape, annot_domain(reshape->input())); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Reshape.test.cpp b/compiler/locomotiv/src/Node/Reshape.test.cpp new file mode 100644 index 000000000..8e54a16df --- /dev/null +++ b/compiler/locomotiv/src/Node/Reshape.test.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Reshape, f32) +{ + // Make pull-reshape graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({4}); + auto reshape = g->nodes()->create<loco::Reshape<loco::ReshapeType::Fixed>>(); + reshape->input(pull); + reshape->shape({2, 2}); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{4}); + pull_buf.at(Index{0}) = 0.0f; + pull_buf.at(Index{1}) = 1.1f; + pull_buf.at(Index{2}) = 2.2f; + pull_buf.at(Index{3}) = 3.3f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(reshape); + + auto reshape_data = locomotiv::annot_data(reshape); + ASSERT_NE(reshape_data, nullptr); + ASSERT_EQ(reshape_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(reshape_data->shape()), (Shape{2, 2})); + ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 0}), 0.0f); + ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{0, 1}), 1.1f); + ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 0}), 2.2f); + ASSERT_FLOAT_EQ(reshape_data->as_f32_bufptr()->at(Index{1, 1}), 3.3f); + + ASSERT_EQ(locomotiv::annot_domain(reshape), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp new file mode 100644 index 000000000..352598b27 --- /dev/null +++ b/compiler/locomotiv/src/Node/Softmax.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Shape; + +#include <cassert> +#include <stdexcept> +#include <cmath> + +namespace +{ + +Index reduce_index(const Index &index, uint32_t axis) +{ + Index r_index; + + r_index.resize(index.rank()); + for (uint32_t i = 0; i < index.rank(); ++i) + r_index.at(i) = index.at(i); + r_index.at(axis) = 0; + + return r_index; +} + +Shape reduce_shape(const Shape &shape, uint32_t axis) +{ + Shape r_shape; + + r_shape.resize(shape.rank()); + for (uint32_t i = 0; i < shape.rank(); ++i) + r_shape.dim(i) = shape.dim(i); + r_shape.dim(axis) = 1; + + return r_shape; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorSoftmax *softmax) +{ + auto input_data = annot_data(softmax->input()); + + validate(input_data, "Input not ready"); + validate(annot_domain(softmax->input()) == loco::Domain::Tensor, + "Input domain of TensorSoftmax is not Tensor"); + + std::unique_ptr<NodeData> softmax_data = nullptr; + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto axis = softmax->axis(); + + auto *input_shape = input_data->shape(); + auto input_bufptr = input_data->as_f32_bufptr(); + auto softmax_buf = make_buffer<float, LexicalLayout>(*input_data->shape()); + + auto reduce_sum_shape = reduce_shape(*input_shape, axis); + auto reduce_sum_bufptr = make_buffer<float, LexicalLayout>(reduce_sum_shape); + + for (IndexEnumerator e{*input_shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + const auto r_index = reduce_index(index, axis); + + reduce_sum_bufptr.at(r_index) += exp(input_bufptr->at(index)); + } + + for (IndexEnumerator e{*input_shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + const auto r_index = reduce_index(index, axis); + + softmax_buf.at(index) = exp(input_bufptr->at(index)) / reduce_sum_bufptr.at(r_index); + } + + softmax_data = make_data(softmax_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(softmax_data != nullptr); + annot_data(softmax, std::move(softmax_data)); + annot_domain(softmax, annot_domain(softmax->input())); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Softmax.test.cpp b/compiler/locomotiv/src/Node/Softmax.test.cpp new file mode 100644 index 000000000..21d240275 --- /dev/null +++ b/compiler/locomotiv/src/Node/Softmax.test.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Softmax, f32) +{ + // Make pull-softmax graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({2, 2}); + auto softmax = g->nodes()->create<loco::TensorSoftmax>(); + softmax->input(pull); + softmax->axis(1); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>({2, 2}); + pull_buf.at(Index{0, 0}) = 1.1f; + pull_buf.at(Index{0, 1}) = 1.1f; + pull_buf.at(Index{1, 0}) = 3.3f; + pull_buf.at(Index{1, 1}) = 3.3f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(softmax); + + auto kShape = Shape{2, 2}; + auto softmax_data = locomotiv::annot_data(softmax); + ASSERT_NE(softmax_data, nullptr); + ASSERT_EQ(softmax_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(softmax_data->shape()), kShape); + ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 0}), 0.5f); + ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{0, 1}), 0.5f); + ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 0}), 0.5f); + ASSERT_FLOAT_EQ(softmax_data->as_f32_bufptr()->at(Index{1, 1}), 0.5f); + + ASSERT_EQ(locomotiv::annot_domain(softmax), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/Tanh.cpp b/compiler/locomotiv/src/Node/Tanh.cpp new file mode 100644 index 000000000..78d329e7c --- /dev/null +++ b/compiler/locomotiv/src/Node/Tanh.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include <cmath> + +namespace +{ + +struct Func final : public locomotiv::UnaryFunc +{ + float apply(float v) const final { return std::tanh(v); } +}; + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::Tanh *tanh) +{ + Func f; + + eltwise_unary(tanh, f); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/Tanh.test.cpp b/compiler/locomotiv/src/Node/Tanh.test.cpp new file mode 100644 index 000000000..78c3a13ba --- /dev/null +++ b/compiler/locomotiv/src/Node/Tanh.test.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Tanh, f32) +{ + // Make pull-Tanh graph + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({3}); + auto tanh = g->nodes()->create<loco::Tanh>(); + tanh->input(pull); + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{3}); + pull_buf.at(Index{0}) = 0.0f; + pull_buf.at(Index{1}) = 1.0f; + pull_buf.at(Index{2}) = -1.0f; + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(tanh); + + auto tanh_data = locomotiv::annot_data(tanh); + ASSERT_NE(tanh_data, nullptr); + ASSERT_EQ(tanh_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(tanh_data->shape()), Shape{3}); + ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{0}), 0.0f); + ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{1}), 0.761594f); + ASSERT_FLOAT_EQ(tanh_data->as_f32_bufptr()->at(Index{2}), -0.761594f); + + ASSERT_EQ(locomotiv::annot_domain(tanh), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp new file mode 100644 index 000000000..010ca6821 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Shape; + +#include <cassert> +#include <stdexcept> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast) +{ + auto input_data = annot_data(tensor_broadcast->input()); + + // Calculate output shape + Shape input_shape = *(input_data->shape()); + + // TODO Reuse "ShapeInferenceService" + Shape output_shape; + + output_shape.resize(input_shape.rank()); + for (uint32_t axis = 0; axis < input_shape.rank(); ++axis) + { + if (tensor_broadcast->mapping()->defined(axis)) + { + assert(input_shape.dim(axis) == 1); // Required by TensorBroadcast definition + output_shape.dim(axis) = tensor_broadcast->mapping()->dim(axis).value(); + } + else + { + output_shape.dim(axis) = input_shape.dim(axis); + } + } + + assert(input_shape.rank() == output_shape.rank()); + + uint32_t const rank = input_shape.rank(); + + std::unique_ptr<NodeData> output_data = nullptr; + + switch (input_data->dtype()) + { + // TODO Use type-generic implementation! + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + auto output_buf = make_buffer<float, LexicalLayout>(output_shape); + + for (IndexEnumerator e{output_shape}; e.valid(); e.advance()) + { + auto input_index = e.current(); + const auto &output_index = e.current(); + + for (uint32_t axis = 0; axis < rank; ++axis) + { + if (tensor_broadcast->mapping()->defined(axis)) + { + input_index.at(axis) = 0; + } + } + + output_buf.at(output_index) = input_bufptr->at(input_index); + } + + output_data = make_data(output_buf); + break; + } + default: + throw std::runtime_error("Not yet supported"); + } + + assert(output_data != nullptr); + annot_data(tensor_broadcast, std::move(output_data)); + annot_domain(tensor_broadcast, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp new file mode 100644 index 000000000..e8347d737 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorBroadcast.test.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_TensorBroadcast, f32) +{ + // Create a sample graph w/ TensorBroadcast + auto g = loco::make_graph(); + auto pull = g->nodes()->create<loco::Pull>(); + pull->dtype(loco::DataType::FLOAT32); + pull->shape({1, 1}); + auto broadcast = g->nodes()->create<loco::TensorBroadcast>(); + broadcast->input(pull); + broadcast->mapping()->dim(0) = 2; + + // Make and assign data to pull node + auto pull_buf = make_buffer<float, LexicalLayout>(Shape{1, 1}); + pull_buf.at(Index{0, 0}) = -1.0f; + + auto pull_data = locomotiv::make_data(pull_buf); + locomotiv::annot_data(pull, std::move(pull_data)); + locomotiv::annot_domain(pull, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(broadcast); + + auto broadcast_data = locomotiv::annot_data(broadcast); + ASSERT_NE(broadcast_data, nullptr); + ASSERT_EQ(broadcast_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ((*(broadcast_data->shape())), (Shape{2, 1})); + ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f); + ASSERT_FLOAT_EQ(broadcast_data->as_f32_bufptr()->at(Index{1, 0}), -1.0f); + + ASSERT_EQ(locomotiv::annot_domain(broadcast), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp new file mode 100644 index 000000000..5097e55c6 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorConcat.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Shape; + +#include <cassert> +#include <stdexcept> + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorConcat *tensor_concat) +{ + auto lhs_data = annot_data(tensor_concat->lhs()); + auto rhs_data = annot_data(tensor_concat->rhs()); + auto axis = tensor_concat->axis(); + + validate(lhs_data && rhs_data, "Ingredient not ready"); + validate(lhs_data->dtype() == rhs_data->dtype(), "lhs and rhs of Concat should have same dtype"); + + validate(annot_domain(tensor_concat->lhs()) == loco::Domain::Tensor && + annot_domain(tensor_concat->rhs()) == loco::Domain::Tensor, + "Some ingredients of TensorConcat is not Tensor"); + + // Calculate output shape + Shape lhs_shape = *lhs_data->shape(); + Shape rhs_shape = *rhs_data->shape(); + Shape concat_shape; + + assert(lhs_shape.rank() == rhs_shape.rank()); + concat_shape.resize(lhs_shape.rank()); + for (uint32_t index = 0; index < lhs_shape.rank(); ++index) + { + if (index == axis) + concat_shape.dim(index) = lhs_shape.dim(index) + rhs_shape.dim(index); + else + { + assert(lhs_shape.dim(index) == rhs_shape.dim(index)); + concat_shape.dim(index) = lhs_shape.dim(index); + } + } + auto left_dim_size = lhs_shape.dim(axis); + + // Copy data from two inputs LHS and RHS to Concat + std::unique_ptr<NodeData> concat_data = nullptr; + switch (lhs_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto lhs_bufptr = lhs_data->as_f32_bufptr(); + auto rhs_bufptr = rhs_data->as_f32_bufptr(); + auto concat_buf = make_buffer<float, LexicalLayout>(concat_shape); + + for (IndexEnumerator e{concat_shape}; e.valid(); e.advance()) + { + const auto &e_index = e.current(); + + if (e_index.at(axis) < left_dim_size) + { + // Left index is same as output index + concat_buf.at(e_index) = lhs_bufptr->at(e_index); + } + else + { + // Adjust right index to valid range + Index r_index = e_index; + r_index.at(axis) -= left_dim_size; + concat_buf.at(e_index) = rhs_bufptr->at(r_index); + } + } + + concat_data = make_data(concat_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(concat_data != nullptr); + annot_data(tensor_concat, std::move(concat_data)); + annot_domain(tensor_concat, loco::Domain::Tensor); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorConcat.test.cpp b/compiler/locomotiv/src/Node/TensorConcat.test.cpp new file mode 100644 index 000000000..d71b51524 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorConcat.test.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_TensorConcat, f32) +{ + // Make (pull, pull)-concat graph + auto g = loco::make_graph(); + auto pull_l = g->nodes()->create<loco::Pull>(); + pull_l->dtype(loco::DataType::FLOAT32); + pull_l->shape({1, 2}); + auto pull_r = g->nodes()->create<loco::Pull>(); + pull_r->dtype(loco::DataType::FLOAT32); + pull_r->shape({1, 2}); + auto tconcat = g->nodes()->create<loco::TensorConcat>(); + tconcat->lhs(pull_l); + tconcat->rhs(pull_r); + tconcat->axis(0); + + // Make and assign data to pull node + auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2}); + pull_l_buf.at(Index{0, 0}) = -1.0f; + pull_l_buf.at(Index{0, 1}) = -2.0f; + auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{1, 2}); + pull_r_buf.at(Index{0, 0}) = 3.0f; + pull_r_buf.at(Index{0, 1}) = 4.0f; + + auto pull_l_data = locomotiv::make_data(pull_l_buf); + locomotiv::annot_data(pull_l, std::move(pull_l_data)); + locomotiv::annot_domain(pull_l, loco::Domain::Tensor); + auto pull_r_data = locomotiv::make_data(pull_r_buf); + locomotiv::annot_data(pull_r, std::move(pull_r_data)); + locomotiv::annot_domain(pull_r, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(tconcat); + + auto concat_data = locomotiv::annot_data(tconcat); + ASSERT_NE(concat_data, nullptr); + ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ((*(concat_data->shape())), (Shape{2, 2})); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f); + + ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor); +} + +TEST(NodeExecution_TensorConcat, f32_2) +{ + // Make (pull, pull)-concat graph + auto g = loco::make_graph(); + auto pull_l = g->nodes()->create<loco::Pull>(); + pull_l->dtype(loco::DataType::FLOAT32); + pull_l->shape({1, 2}); + auto pull_r = g->nodes()->create<loco::Pull>(); + pull_r->dtype(loco::DataType::FLOAT32); + pull_r->shape({3, 2}); + auto tconcat = g->nodes()->create<loco::TensorConcat>(); + tconcat->lhs(pull_l); + tconcat->rhs(pull_r); + tconcat->axis(0); + + // Make and assign data to pull node + auto pull_l_buf = make_buffer<float, LexicalLayout>(Shape{1, 2}); + pull_l_buf.at(Index{0, 0}) = -1.0f; + pull_l_buf.at(Index{0, 1}) = -2.0f; + auto pull_r_buf = make_buffer<float, LexicalLayout>(Shape{3, 2}); + pull_r_buf.at(Index{0, 0}) = 3.0f; + pull_r_buf.at(Index{0, 1}) = 4.0f; + pull_r_buf.at(Index{1, 0}) = -3.0f; + pull_r_buf.at(Index{1, 1}) = -4.0f; + pull_r_buf.at(Index{2, 0}) = 5.0f; + pull_r_buf.at(Index{2, 1}) = 6.0f; + + auto pull_l_data = locomotiv::make_data(pull_l_buf); + locomotiv::annot_data(pull_l, std::move(pull_l_data)); + locomotiv::annot_domain(pull_l, loco::Domain::Tensor); + auto pull_r_data = locomotiv::make_data(pull_r_buf); + locomotiv::annot_data(pull_r, std::move(pull_r_data)); + locomotiv::annot_domain(pull_r, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(tconcat); + + auto concat_data = locomotiv::annot_data(tconcat); + ASSERT_NE(concat_data, nullptr); + ASSERT_EQ(concat_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ((*(concat_data->shape())), (Shape{4, 2})); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 0}), -1.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{0, 1}), -2.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 0}), 3.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{1, 1}), 4.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 0}), -3.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{2, 1}), -4.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 0}), 5.0f); + ASSERT_FLOAT_EQ(concat_data->as_f32_bufptr()->at(Index{3, 1}), 6.0f); + + ASSERT_EQ(locomotiv::annot_domain(tconcat), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp new file mode 100644 index 000000000..989afaf94 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> + +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorConstantPad *pad) +{ + auto input_data = annot_data(pad->input()); + auto input_domain = annot_domain(pad->input()); + validate(input_data, "Input not ready"); + validate(input_domain == loco::Domain::Tensor, "Input domain of TensorConstantPad is not Tensor"); + + auto input_shape = input_data->shape(); + const uint32_t input_rank = input_shape->rank(); + + auto padding = pad->padding(); + validate(input_rank == padding->rank(), "input and padding should have same rank"); + + auto constant_node = pad->constant(); + auto constant_data = annot_data(constant_node); + validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type"); + validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1, + "constant should have one rank with one dimension at zero axis"); + + std::unique_ptr<NodeData> pad_data = nullptr; + Index base_index; + base_index.resize(input_rank); + + // Tensor is padded by relocating its base. + // padded output index = input index + base index + for (uint32_t axis = 0; axis < padding->rank(); axis++) + { + base_index.at(axis) = padding->front(axis); + } + + // calculate output shape + Shape output_shape; + output_shape.resize(input_rank); + for (uint32_t i = 0; i < input_rank; i++) + { + output_shape.dim(i) = input_shape->dim(i) + padding->front(i) + padding->back(i); + } + + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_buf = input_data->as_f32_bufptr(); + auto constant_data_buf = constant_data->as_f32_bufptr(); + const auto constant_value = constant_data_buf->at(Index{0}); + + auto output_buf = make_buffer<float, LexicalLayout>(output_shape); + + for (IndexEnumerator ie{*input_shape}, oe{output_shape}; oe.valid(); oe.advance()) + { + auto input_index = ie.current(); + auto output_index = oe.current(); + + if ((input_index + base_index) == output_index) + { + output_buf.at(output_index) = input_buf->at(input_index); + ie.advance(); + } + else + { + output_buf.at(output_index) = constant_value; + } + } + + pad_data = make_data(output_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(pad_data != nullptr); + annot_data(pad, std::move(pad_data)); + annot_domain(pad, annot_domain(pad->input())); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp new file mode 100644 index 000000000..0f60c5f85 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorConstantPad.test.cpp @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Shape; + +TEST(NodeExecution_Pad, tensor_constant_pad_4_dim) +{ + auto g = loco::make_graph(); + + auto inputTensor = g->nodes()->create<loco::Pull>(); + inputTensor->dtype(loco::DataType::FLOAT32); + inputTensor->shape({1, 2, 2, 1}); + auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{1, 2, 2, 1}); + inputTensor_buf.at(Index{0, 0, 0, 0}) = 1.0f; + inputTensor_buf.at(Index{0, 0, 1, 0}) = 2.0f; + inputTensor_buf.at(Index{0, 1, 0, 0}) = 3.0f; + inputTensor_buf.at(Index{0, 1, 1, 0}) = 4.0f; + auto inputTensor_data = locomotiv::make_data(inputTensor_buf); + locomotiv::annot_data(inputTensor, std::move(inputTensor_data)); + locomotiv::annot_domain(inputTensor, loco::Domain::Tensor); + + auto constant = g->nodes()->create<loco::ConstGen>(); + constant->dtype(loco::DataType::FLOAT32); + constant->shape({1}); + auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1}); + constant_buf.at(Index{0}) = 0.0f; + auto constant_data = locomotiv::make_data(constant_buf); + locomotiv::annot_data(constant, std::move(constant_data)); + locomotiv::annot_domain(constant, loco::Domain::Tensor); + + auto pad = g->nodes()->create<loco::TensorConstantPad>(); + pad->input(inputTensor); + pad->constant(constant); + + auto padding = pad->padding(); + padding->rank(4); + padding->front(0) = 0; + padding->back(0) = 0; + padding->front(1) = 3; + padding->back(1) = 1; + padding->front(2) = 1; + padding->back(2) = 1; + padding->front(3) = 0; + padding->back(3) = 0; + + locomotiv::NodeExecution::get().run(pad); + + auto pad_data = locomotiv::annot_data(pad); + ASSERT_NE(pad_data, nullptr); + ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(pad_data->shape()), Shape({1, 6, 4, 1})); + + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 1, 0}), 1.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 3, 2, 0}), 2.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 1, 0}), 3.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 4, 2, 0}), 4.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0, 0, 0, 0}), 0.0f); + + ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor); +} + +TEST(NodeExecution_Pad, tensor_constant_pad_1_dim) +{ + auto g = loco::make_graph(); + + auto inputTensor = g->nodes()->create<loco::Pull>(); + inputTensor->dtype(loco::DataType::FLOAT32); + inputTensor->shape({3}); + auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{3}); + inputTensor_buf.at(Index{0}) = 1.0f; + inputTensor_buf.at(Index{1}) = 5.0f; + inputTensor_buf.at(Index{2}) = 3.0f; + auto inputTensor_data = locomotiv::make_data(inputTensor_buf); + locomotiv::annot_data(inputTensor, std::move(inputTensor_data)); + locomotiv::annot_domain(inputTensor, loco::Domain::Tensor); + + auto constant = g->nodes()->create<loco::ConstGen>(); + constant->dtype(loco::DataType::FLOAT32); + constant->shape({1}); + auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1}); + constant_buf.at(Index{0}) = 0.0f; + auto constant_data = locomotiv::make_data(constant_buf); + locomotiv::annot_data(constant, std::move(constant_data)); + locomotiv::annot_domain(constant, loco::Domain::Tensor); + + auto pad = g->nodes()->create<loco::TensorConstantPad>(); + pad->input(inputTensor); + pad->constant(constant); + auto padding = pad->padding(); + padding->rank(1); + padding->front(0) = 2; + padding->back(0) = 1; + + locomotiv::NodeExecution::get().run(pad); + + auto pad_data = locomotiv::annot_data(pad); + ASSERT_NE(pad_data, nullptr); + ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(pad_data->shape()), Shape({6})); + + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{0}), 0.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1}), 0.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{2}), 1.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{3}), 5.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{4}), 3.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{5}), 0.0f); + + ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor); +} + +TEST(NodeExecution_Pad, tensor_constant_pad_6_dim) +{ + auto g = loco::make_graph(); + + auto inputTensor = g->nodes()->create<loco::Pull>(); + inputTensor->dtype(loco::DataType::FLOAT32); + inputTensor->shape({2, 1, 3, 2, 1, 2}); + auto inputTensor_buf = make_buffer<float, LexicalLayout>(Shape{2, 1, 3, 2, 1, 2}); + int a, b, c, d, e, f; + float dummy = 1.0f; + for (uint32_t a = 0; a < 2; a++) + { + for (uint32_t b = 0; b < 1; b++) + { + for (uint32_t c = 0; c < 3; c++) + { + for (uint32_t d = 0; d < 2; d++) + { + for (uint32_t e = 0; e < 1; e++) + { + for (uint32_t f = 0; f < 2; f++) + { + inputTensor_buf.at(Index{a, b, c, d, e, f}) = dummy++; + } + } + } + } + } + } + auto inputTensor_data = locomotiv::make_data(inputTensor_buf); + locomotiv::annot_data(inputTensor, std::move(inputTensor_data)); + locomotiv::annot_domain(inputTensor, loco::Domain::Tensor); + + auto constant = g->nodes()->create<loco::ConstGen>(); + constant->dtype(loco::DataType::FLOAT32); + constant->shape({1}); + auto constant_buf = make_buffer<float, LexicalLayout>(Shape{1}); + constant_buf.at(Index{0}) = 0.0f; + auto constant_data = locomotiv::make_data(constant_buf); + locomotiv::annot_data(constant, std::move(constant_data)); + locomotiv::annot_domain(constant, loco::Domain::Tensor); + + auto pad = g->nodes()->create<loco::TensorConstantPad>(); + pad->input(inputTensor); + pad->constant(constant); + auto padding = pad->padding(); + + padding->rank(6); + padding->front(0) = 1; + padding->back(0) = 1; + padding->front(1) = 0; + padding->back(1) = 0; + padding->front(2) = 1; + padding->back(2) = 2; + padding->front(3) = 2; + padding->back(3) = 1; + padding->front(4) = 0; + padding->back(4) = 0; + padding->front(5) = 1; + padding->back(5) = 2; + + locomotiv::NodeExecution::get().run(pad); + + auto pad_data = locomotiv::annot_data(pad); + ASSERT_NE(pad_data, nullptr); + ASSERT_EQ(pad_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(pad_data->shape()), Shape({4, 1, 6, 5, 1, 5})); + + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 1}), 1.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 2, 0, 2}), 2.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 1}), 3.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 1, 3, 0, 2}), 4.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 1}), 5.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 2, 0, 2}), 6.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 1}), 7.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 2, 3, 0, 2}), 8.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 1}), 9.0f); + ASSERT_FLOAT_EQ(pad_data->as_f32_bufptr()->at(Index{1, 0, 3, 2, 0, 2}), 10.0f); + + ASSERT_EQ(locomotiv::annot_domain(pad), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp new file mode 100644 index 000000000..fae7a75c5 --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorReduce.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Buffer; + +#include <cassert> +#include <stdexcept> + +namespace +{ + +Index reduced_index(const Index &index, const loco::TensorAxisSet &axes) +{ + Index r_index; + + r_index.resize(index.rank()); + for (uint32_t i = 0; i < index.rank(); ++i) + r_index.at(i) = (axes.defined(i)) ? 0 : index.at(i); + + return r_index; +} + +Shape reduced_shape(const Shape &shape, const loco::TensorAxisSet &axes) +{ + Shape r_shape; + + r_shape.resize(shape.rank()); + for (uint32_t i = 0; i < shape.rank(); ++i) + r_shape.dim(i) = (axes.defined(i)) ? 1 : shape.dim(i); + + return r_shape; +} + +} // namespace + +namespace +{ + +template <typename T, loco::ReduceFunc F> struct ReduceFunction +{ + static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes) + { + throw std::runtime_error("Not supported ReduceFunc type"); + } +}; + +template <typename T> struct ReduceFunction<T, loco::ReduceFunc::Mean> +{ + static void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorAxisSet &axes) + { + for (IndexEnumerator e{rhs.shape()}; e.valid(); e.advance()) + { + const auto &index = e.current(); + const auto r_index = reduced_index(index, axes); + + lhs.at(r_index) += rhs.at(index); + } + + uint32_t r_cnt = 1; + for (uint32_t i = 0; i < rhs.shape().rank(); ++i) + if (axes.defined(i)) + r_cnt *= rhs.shape().dim(i); + + for (IndexEnumerator e{lhs.shape()}; e.valid(); e.advance()) + { + const auto &index = e.current(); + lhs.at(index) /= static_cast<T>(r_cnt); + } + } +}; + +template <typename T> +void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node) +{ + switch (node.func()) + { + case loco::ReduceFunc::Mean: + ReduceFunction<T, loco::ReduceFunc::Mean>::apply(lhs, rhs, *node.axes()); + break; + + // TODO Support more ReduceFunc type + default: + break; + } +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TensorReduce *node) +{ + auto input_data = annot_data(node->input()); + auto input_shape = input_data->shape(); + + validate(input_data, "Input not ready"); + validate(annot_domain(node->input()) == loco::Domain::Tensor, + "Input domain of TensorReduce is not Tensor"); + + std::unique_ptr<NodeData> reduce_data = nullptr; + Shape r_shape = reduced_shape(*input_shape, *node->axes()); + switch (input_data->dtype()) + { + case loco::DataType::FLOAT32: + { + auto input_bufptr = input_data->as_f32_bufptr(); + auto reduce_buf = make_buffer<float, LexicalLayout>(r_shape); + + apply(reduce_buf, *input_bufptr, *node); + + reduce_data = make_data(reduce_buf); + break; + } + default: + throw std::runtime_error("NYI for this DataType"); + } + + assert(reduce_data != nullptr); + annot_data(node, std::move(reduce_data)); + annot_domain(node, annot_domain(node->input())); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TensorReduce.test.cpp b/compiler/locomotiv/src/Node/TensorReduce.test.cpp new file mode 100644 index 000000000..68398cacd --- /dev/null +++ b/compiler/locomotiv/src/Node/TensorReduce.test.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <gtest/gtest.h> + +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +TEST(NodeExecution_Fixed_Reduce_Mean, f32_0) +{ + // Make pull-TensorReduce(Mean) graph + auto g = loco::make_graph(); + auto pull_input = g->nodes()->create<loco::Pull>(); + pull_input->dtype(loco::DataType::FLOAT32); + pull_input->shape({1, 2, 2}); + auto reduce_node = g->nodes()->create<loco::TensorReduce>(); + reduce_node->input(pull_input); + reduce_node->axes()->insert(0); + reduce_node->axes()->insert(1); + reduce_node->func(loco::ReduceFunc::Mean); + + // Make and assign data to pull node + auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2}); + pull_input_buf.at(Index{0, 0, 0}) = 1.1f; + pull_input_buf.at(Index{0, 0, 1}) = 2.2f; + pull_input_buf.at(Index{0, 1, 0}) = 5.5f; + pull_input_buf.at(Index{0, 1, 1}) = 6.6f; + auto pull_input_data = locomotiv::make_data(pull_input_buf); + locomotiv::annot_data(pull_input, std::move(pull_input_data)); + locomotiv::annot_domain(pull_input, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(reduce_node); + + auto kShape = Shape{1, 1, 2}; + auto reduce_data = locomotiv::annot_data(reduce_node); + ASSERT_NE(reduce_data, nullptr); + ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(reduce_data->shape()), kShape); + ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.3f); + ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 1}), 4.4f); + + ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor); +} + +TEST(NodeExecution_Fixed_Reduce_Mean, f32_1) +{ + // Make pull-TensorReduce(Mean) graph + auto g = loco::make_graph(); + auto pull_input = g->nodes()->create<loco::Pull>(); + pull_input->dtype(loco::DataType::FLOAT32); + pull_input->shape({1, 2, 2}); + auto reduce_node = g->nodes()->create<loco::TensorReduce>(); + reduce_node->input(pull_input); + reduce_node->axes()->insert(1); + reduce_node->axes()->insert(2); + reduce_node->func(loco::ReduceFunc::Mean); + + // Make and assign data to pull node + auto pull_input_buf = make_buffer<float, LexicalLayout>({1, 2, 2}); + pull_input_buf.at(Index{0, 0, 0}) = 1.1f; + pull_input_buf.at(Index{0, 0, 1}) = 2.2f; + pull_input_buf.at(Index{0, 1, 0}) = 5.5f; + pull_input_buf.at(Index{0, 1, 1}) = 6.6f; + auto pull_input_data = locomotiv::make_data(pull_input_buf); + locomotiv::annot_data(pull_input, std::move(pull_input_data)); + locomotiv::annot_domain(pull_input, loco::Domain::Tensor); + + locomotiv::NodeExecution::get().run(reduce_node); + + auto kShape = Shape{1, 1, 1}; + auto reduce_data = locomotiv::annot_data(reduce_node); + ASSERT_NE(reduce_data, nullptr); + ASSERT_EQ(reduce_data->dtype(), loco::DataType::FLOAT32); + ASSERT_EQ(*(reduce_data->shape()), kShape); + ASSERT_FLOAT_EQ(reduce_data->as_f32_bufptr()->at(Index{0, 0, 0}), 3.85f); + + ASSERT_EQ(locomotiv::annot_domain(reduce_node), loco::Domain::Tensor); +} diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp new file mode 100644 index 000000000..3ea4f071d --- /dev/null +++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Compute 1D output size for transposed convolution based on given 1D arguments. + * + * @param whole_pad Sum of front and rear pad + */ +inline uint32_t compute_transposed_out_size(uint32_t input_size, uint32_t whole_pad, + uint32_t filter_size, uint32_t stride) +{ + return stride * (input_size - 1) + filter_size - whole_pad; +} + +/** + * @brief Calculates TransposedConv2D + * @note Both input_buf and filter_buf have NHWC format + */ +template <typename RET_T, typename IFM_T, typename FIL_T> +Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d, + const Buffer<IFM_T> *input_buf, const Buffer<FIL_T> *filter_buf) +{ + auto input_shape = input_buf->shape(); + auto filter_shape = filter_buf->shape(); + + locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4"); + locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4"); + locomotiv::validate(input_shape.dim(3) /* depth of input */ == + filter_shape.dim(3) /* depth of filter */, + "channel value mismatch"); + + const uint32_t input_height = input_shape.dim(1); + const uint32_t input_width = input_shape.dim(2); + + const uint32_t filter_height = filter_shape.dim(1); + const uint32_t filter_width = filter_shape.dim(2); + + const uint32_t stride_width = tr_conv2d->stride()->horizontal(); + const uint32_t stride_height = tr_conv2d->stride()->vertical(); + + const uint32_t pad_top = tr_conv2d->pad()->top(); + const uint32_t pad_bottom = tr_conv2d->pad()->bottom(); + + const uint32_t pad_left = tr_conv2d->pad()->left(); + const uint32_t pad_right = tr_conv2d->pad()->right(); + + // TODO Support dilations + + const uint32_t output_height = + compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height); + const uint32_t output_width = + compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width); + + const uint32_t batches = input_shape.dim(0); + const uint32_t input_depth = input_shape.dim(3); + const uint32_t output_depth = filter_shape.dim(0); // count of filter + + Shape output_shape{batches, output_height, output_width, output_depth}; + auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape); + + // initialize output + for (IndexEnumerator e{output_shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + output_buf.at(index) = static_cast<RET_T>(0); + } + + // Loop through input elements one at a time. + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t in_y = 0; in_y < input_height; ++in_y) + { + for (uint32_t in_x = 0; in_x < input_width; ++in_x) + { + for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel) + { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_left; + const int out_y_origin = (in_y * stride_height) - pad_top; + for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel) + { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && ((unsigned)out_x < output_width) && (out_y >= 0) && + ((unsigned)out_y < output_height)) + { + auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel})); + auto filter_value = + filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel})); + output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) += + input_value * filter_value; + } + } + } + } + } + } + } + } + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) +{ + auto ifm_data = annot_data(tr_conv2d->ifm()); + auto ker_data = annot_data(tr_conv2d->ker()); + + validate(ifm_data, "Can't find input data of TransposedConv2D"); + validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4"); + + validate(ker_data, "Can't find kernel data of TransposedConv2D"); + validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4"); + + validate(annot_domain(tr_conv2d->ifm()) == loco::Domain::Feature, + "IFM of TransposedConv2D is not feature"); + validate(annot_domain(tr_conv2d->ker()) == loco::Domain::Filter, + "Kernel of TransposedConv2D is not filter"); + + std::unique_ptr<NodeData> tr_conv2d_result = nullptr; + + if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32) + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + auto ker_buf = ker_data->as_f32_bufptr(); + + auto tr_conv2d_buf = calc_tr_conv2D<float, float, float>(tr_conv2d, ifm_buf, ker_buf); + + tr_conv2d_result = make_data(tr_conv2d_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(tr_conv2d_result != nullptr); + + annot_data(tr_conv2d, std::move(tr_conv2d_result)); + annot_domain(tr_conv2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp new file mode 100644 index 000000000..bd955a06b --- /dev/null +++ b/compiler/locomotiv/src/Node/TransposedConv2D.test.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Overlay.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include <gtest/gtest.h> + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape, + const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v, + const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0, + const uint32_t pad_left = 0, const uint32_t pad_right = 0) +{ + auto g = loco::make_graph(); + + // Fill output data of FeatureEncode from ifm + auto ifm_enc = g->nodes()->create<loco::FeatureEncode>(); + { + auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape); + auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ifm_enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ifm_enc_buf); + locomotiv::annot_data(ifm_enc, std::move(enc_data)); + locomotiv::annot_domain(ifm_enc, loco::Domain::Feature); + } + + // Fill output data of FilterEncode from ker + auto ker_enc = g->nodes()->create<loco::FilterEncode>(); + { + auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape); + auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ker_enc_buf.at(ind) = ker_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ker_enc_buf); + locomotiv::annot_data(ker_enc, std::move(enc_data)); + locomotiv::annot_domain(ker_enc, loco::Domain::Filter); + } + + // build TransposedConv2D + auto tr_conv2d = g->nodes()->create<loco::TransposedConv2D>(); + tr_conv2d->ifm(ifm_enc); + tr_conv2d->ker(ker_enc); + tr_conv2d->stride()->vertical(stride_v); + tr_conv2d->stride()->horizontal(stride_h); + tr_conv2d->pad()->top(pad_top); + tr_conv2d->pad()->bottom(pad_bottom); + tr_conv2d->pad()->left(pad_left); + tr_conv2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(tr_conv2d); + + // get result of calculation + auto conv2d_result = locomotiv::annot_data(tr_conv2d); + + // check the result + ASSERT_NE(conv2d_result, nullptr); + ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(tr_conv2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off +/* +ifm = tf.constant(1.1, shape = [1, 2, 2, 4]) +ker = tf.constant(2.2, shape = [3, 3, 2, 4]) +tr_conv = tf.nn.conv2d_transpose(ifm, ker, output_shape = (1, 5, 5, 2), strides = [1, 2, 2, 1], padding = "VALID") + +with tf.Session() as session: + tr_conv_data = session.run(tr_conv) + */ +TEST(NodeExecution_TransposedConv2D, f32) +{ + using nncc::core::ADT::tensor::Shape; + + float ifm[1 * 2 * 2 * 4]; + for (int n = 0; n < 1 * 2 * 2 * 4; n++) + ifm[n] = 1.1; + + float ker[2 * 3 * 3 * 4]; // NHWC + for (int n = 0; n < 2 * 3 * 3 * 4; n++) + ker[n] = 2.2; + + float ofm[1 * 5 * 5 * 2] = {9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68, + 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68, + 19.36, 19.36, 19.36, 19.36, 38.72, 38.72, 19.36, 19.36, 19.36, 19.36, + 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68, + 9.68, 9.68, 9.68, 9.68, 19.36, 19.36, 9.68, 9.68, 9.68, 9.68}; + + run_test(ifm, ker, ofm, + Shape{1, 2, 2, 4}, Shape{2, 3, 3, 4}, Shape{1, 5, 5, 2}, // shapes of ifm, ker, ofm + 2, 2 // stride + ); +} +// clang-format on |