inference-engine/thirdparty/clDNN/src/convolution.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

/*
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/

///////////////////////////////////////////////////////////////////////////////////////////////////
#include "convolution_inst.h"
#include "primitive_type_base.h"
#include "sliding_window_utils.h"
#include "error_handler.h"
#include "json_object.h"

namespace cldnn
{
primitive_type_id convolution_type_id()
{
    static primitive_type_base<convolution> instance;
    return &instance;
}

layout convolution_inst::calc_output_layout(convolution_node const& node)
{
    auto desc = node.get_primitive();

    auto input_layout = node.input().get_output_layout();
    auto weights_layout = node.weights(0).get_output_layout(); //weights are stored after inputs

    auto input_offset = desc->input_offset;
    auto stride = desc->stride;
    auto dilation = desc->dilation;
    auto split = desc->weights.size();

    // compute how many outputs in rows and columns will be generate by filter. 
    // outp <= (input_size - (2*input_offset) - kernel_size)/ stride 
    auto filter_size = weights_layout.size;

    // TODO: Consider moving general parameter verification to arguments constructor.
    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial X", stride.spatial[0], "value", 0, "Stride spatial X must be positive (>= 1)");
    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial Y", stride.spatial[1], "value", 0, "Stride spatial Y must be positive (>= 1)");
    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial X", dilation.spatial[0], "value", 0, "Dilatation patial X must be positive (>= 1)");
    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial Y", dilation.spatial[1], "value", 0, "Dilatation spatial Y must be positive (>= 1)");
    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial X", 2 * input_offset.spatial[0], "input layout spatial X", input_layout.size.spatial[0], "There is no input data to process");
    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial Y", 2 * input_offset.spatial[1], "input layout spatial Y", input_layout.size.spatial[1], "There is no input data to process");
    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset feature", input_offset.feature[0], "", 0, "Input offset in feature is not supported");
    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset batch", input_offset.batch[0], "", 0, "Input offset in batch is not supported");

    // TODO: FCN and SSD used offset larger than convolution size. does it make sense to support it? do we support it on the ref kernels?
//     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial X", -input_offset.spatial[0], "input window size spatial X", filter_size.spatial[0], "First convolution is outside of image. please reduce input offset X");
//     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial Y", -input_offset.spatial[1], "input window size spatial Y", filter_size.spatial[1], "First convolution is outside of image. please reduce input offset Y");

    if (input_layout.format == format::winograd_2x3_s1_weights || input_layout.format == format::winograd_2x3_s1_fused_weights ||
        input_layout.format == format::winograd_6x3_s1_fused_weights || input_layout.format == format::image_2d_weights_winograd_6x3_s1_fbxyb || input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb)
        CLDNN_ERROR_MESSAGE(node.id(), "Input for convolution should not be in windograd weights format - it is reserved for weights only");

    if (input_layout.format == format::winograd_2x3_s1_data)
    {
        CLDNN_ERROR_NOT_EQUAL(node.id(), "convolution split", split, "expected value", 1, "Convolution with winograd input only supports split == 1");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial X", stride.spatial[0], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial Y", stride.spatial[1], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial X", dilation.spatial[0], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial Y", dilation.spatial[1], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
        if (input_layout.size.feature[0] % 32 != 0)
            CLDNN_ERROR_MESSAGE(node.id(), "Input for winograd 2x3 convolution should have features count divisable by 32");
        if (weights_layout.size.batch[0] % 32 != 0)
            CLDNN_ERROR_MESSAGE(node.id(), "Number of filters (OFM) for winograd 2x3 convolution should be divisable by 32");

        if (node.get_primitive()->with_activation)
            CLDNN_ERROR_MESSAGE(node.id(), "Winograd 2x3 convolution should not have activation fused - activation should be performed at transformation from winograd domain stage");

        CLDNN_ERROR_LESS_THAN(node.id(), "input width", input_layout.size.spatial[0], "filter width", 3, "Convolution input is smaller than weights");
        CLDNN_ERROR_LESS_THAN(node.id(), "input height", input_layout.size.spatial[1], "filter height", 3, "Convolution input is smaller than weights");

        constexpr tensor::value_type filter_height = 3; //by definition of format::winograd_2x3_s1_data (our assumption)
        constexpr tensor::value_type winograd_filter_height = filter_height; //for this format, winograd filter is considered to be a set of 1d filters so its height should remain the same as original filter's

        return layout{ input_layout.data_type, input_layout.format, tensor{ input_layout.size.batch[0], weights_layout.size.batch[0], input_layout.size.spatial[0], input_layout.size.spatial[1] - winograd_filter_height + 1 }, input_layout.data_padding };
    }

    // get output feature map from weights. It should be the same as number of biases. Will be verifed in convolution::create()
    auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(split);

    if (desc->with_output_size)
    {
        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial X", desc->output_size.spatial[0], "value", 0, "must be positive(>= 1)");
        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial Y", desc->output_size.spatial[1], "value", 0, "must be positive(>= 1)");

        tensor output_size(input_layout.size.batch[0], number_of_features,
                           desc->output_size.spatial[0], desc->output_size.spatial[1]);
        return { input_layout.data_type, input_layout.format, output_size };
    }

    auto output_range = calc_sliding_window_output_range<swor_mode::all>(
        input_layout.size, filter_size, input_offset, stride, dilation, true, 1);

    tensor output_size(input_layout.size.batch[0], number_of_features,
                       output_range.spatial[0], output_range.spatial[1]);
    return { input_layout.data_type, input_layout.format, output_size };
}

std::string convolution_inst::to_string(convolution_node const& node)
{    
    auto desc       = node.get_primitive();
    auto strd       = desc->stride;
    auto split      = node.get_split();
    auto dilation   = desc->dilation;
    auto node_info  = node.desc_to_json();
    auto activation = desc->with_activation ? " true" : "false";

    std::stringstream primitive_description;

    json_composite conv_info;
    conv_info.add("stride", strd.to_string());
    conv_info.add("input offset", desc->input_offset.to_string());
    conv_info.add("split", split);
    conv_info.add("dilation", dilation.to_string());
    conv_info.add("with activation", activation);
    conv_info.add("slope", desc->activation_negative_slope);
    if (desc->with_output_size)
    {
        json_composite ud_out_size_info;
        ud_out_size_info.add("size", desc->output_size.to_string());
        conv_info.add("with user defined output size", ud_out_size_info);
    }

    node_info.add("convolution info", conv_info);
    node_info.dump(primitive_description);

    return primitive_description.str();
}

convolution_inst::typed_primitive_inst(network_impl& network, convolution_node const& node)
    : parent(network, node)
{
    auto stride = argument.stride;

    auto input_inst = node.input().get_output_layout();
    auto output_inst = node.get_output_layout();
    auto output_size = output_inst.size;

    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismtach");
    CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismtach");

    auto split = node.get_split();
    for (decltype(split) j = 0; j < split; j++)
    {
        auto filter_inst = node.weights(j).get_output_layout(); //convolution filter
        if (bias_term())
        {
            auto bias_inst = node.bias(j).get_output_layout();
            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias batch[0]", bias_inst.size.batch[0], "expected size of batch", 1, "Biases isn't 1D vector.");
            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias feature[0]", bias_inst.size.feature[0], "expected size of feature", 1, "Biases isn't 1D vector.");
            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[1]", bias_inst.size.spatial[1], "expected size of spatial[1]", 1, "Biases isn't 1D vector.");
          
            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismtach");
        }

        auto input_offset = argument.input_offset;

        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismtach");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Convolution padding mode", node.get_output_layout().data_padding.filling_value(), "padding value", 0.0f, "Unknown padding mode.");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismtach");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output feature size", output_size.feature.size(), "expected feature size", 1, "Only one-dimensional features are supported");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output batch size", output_size.batch.size(), "expected output size", 1, "Only one-dimensional batch size are supported");
        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights spatial size", filter_inst.size.spatial.size(), "expected weights spatial size", 2, "Weights have to have 2 dimensions in spatial domain.");
        CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismtach");
    }
}
}