diff options
Diffstat (limited to 'compiler/locomotiv/src/Node/TransposedConv2D.cpp')
-rw-r--r-- | compiler/locomotiv/src/Node/TransposedConv2D.cpp | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp new file mode 100644 index 000000000..3ea4f071d --- /dev/null +++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include <nncc/core/ADT/tensor/Shape.h> +#include <nncc/core/ADT/tensor/Buffer.h> +#include <nncc/core/ADT/tensor/Index.h> +#include <nncc/core/ADT/tensor/IndexEnumerator.h> +#include <nncc/core/ADT/tensor/LexicalLayout.h> + +#include <cassert> +#include <stdexcept> + +namespace +{ + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Compute 1D output size for transposed convolution based on given 1D arguments. + * + * @param whole_pad Sum of front and rear pad + */ +inline uint32_t compute_transposed_out_size(uint32_t input_size, uint32_t whole_pad, + uint32_t filter_size, uint32_t stride) +{ + return stride * (input_size - 1) + filter_size - whole_pad; +} + +/** + * @brief Calculates TransposedConv2D + * @note Both input_buf and filter_buf have NHWC format + */ +template <typename RET_T, typename IFM_T, typename FIL_T> +Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d, + const Buffer<IFM_T> *input_buf, const Buffer<FIL_T> *filter_buf) +{ + auto input_shape = input_buf->shape(); + auto filter_shape = filter_buf->shape(); + + locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4"); + locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4"); + locomotiv::validate(input_shape.dim(3) /* depth of input */ == + filter_shape.dim(3) /* depth of filter */, + "channel value mismatch"); + + const uint32_t input_height = input_shape.dim(1); + const uint32_t input_width = input_shape.dim(2); + + const uint32_t filter_height = filter_shape.dim(1); + const uint32_t filter_width = filter_shape.dim(2); + + const uint32_t stride_width = tr_conv2d->stride()->horizontal(); + const uint32_t stride_height = tr_conv2d->stride()->vertical(); + + const uint32_t pad_top = tr_conv2d->pad()->top(); + const uint32_t pad_bottom = tr_conv2d->pad()->bottom(); + + const uint32_t pad_left = tr_conv2d->pad()->left(); + const uint32_t pad_right = tr_conv2d->pad()->right(); + + // TODO Support dilations + + const uint32_t output_height = + compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height); + const uint32_t output_width = + compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width); + + const uint32_t batches = input_shape.dim(0); + const uint32_t input_depth = input_shape.dim(3); + const uint32_t output_depth = filter_shape.dim(0); // count of filter + + Shape output_shape{batches, output_height, output_width, output_depth}; + auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape); + + // initialize output + for (IndexEnumerator e{output_shape}; e.valid(); e.advance()) + { + const auto &index = e.current(); + output_buf.at(index) = static_cast<RET_T>(0); + } + + // Loop through input elements one at a time. + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t in_y = 0; in_y < input_height; ++in_y) + { + for (uint32_t in_x = 0; in_x < input_width; ++in_x) + { + for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel) + { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_left; + const int out_y_origin = (in_y * stride_height) - pad_top; + for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel) + { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && ((unsigned)out_x < output_width) && (out_y >= 0) && + ((unsigned)out_y < output_height)) + { + auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel})); + auto filter_value = + filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel})); + output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) += + input_value * filter_value; + } + } + } + } + } + } + } + } + return output_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) +{ + auto ifm_data = annot_data(tr_conv2d->ifm()); + auto ker_data = annot_data(tr_conv2d->ker()); + + validate(ifm_data, "Can't find input data of TransposedConv2D"); + validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4"); + + validate(ker_data, "Can't find kernel data of TransposedConv2D"); + validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4"); + + validate(annot_domain(tr_conv2d->ifm()) == loco::Domain::Feature, + "IFM of TransposedConv2D is not feature"); + validate(annot_domain(tr_conv2d->ker()) == loco::Domain::Filter, + "Kernel of TransposedConv2D is not filter"); + + std::unique_ptr<NodeData> tr_conv2d_result = nullptr; + + if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32) + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + auto ker_buf = ker_data->as_f32_bufptr(); + + auto tr_conv2d_buf = calc_tr_conv2D<float, float, float>(tr_conv2d, ifm_buf, ker_buf); + + tr_conv2d_result = make_data(tr_conv2d_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(tr_conv2d_result != nullptr); + + annot_data(tr_conv2d, std::move(tr_conv2d_result)); + annot_domain(tr_conv2d, loco::Domain::Feature); +} + +} // namespace locomotiv |