summaryrefslogtreecommitdiff
path: root/model-optimizer/mo/ops/convolution.py
diff options
context:
space:
mode:
Diffstat (limited to 'model-optimizer/mo/ops/convolution.py')
-rw-r--r--model-optimizer/mo/ops/convolution.py232
1 files changed, 215 insertions, 17 deletions
diff --git a/model-optimizer/mo/ops/convolution.py b/model-optimizer/mo/ops/convolution.py
index b206c4fe8..fc08c4b4b 100644
--- a/model-optimizer/mo/ops/convolution.py
+++ b/model-optimizer/mo/ops/convolution.py
@@ -14,11 +14,18 @@
limitations under the License.
"""
+import logging as log
+
import networkx as nx
+import numpy as np
-from mo.front.common.partial_infer.convolution import caffe_conv2d_infer
+from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \
+ tf_window_op_pad_infer
from mo.front.extractor import spatial_getter
-from mo.ops.op import Op
+from mo.front.onnx.extractors.utils import get_backend_pad
+from mo.graph.graph import Node
+from mo.ops.op import Op, PermuteAttrs
+from mo.utils.error import Error
class Convolution(Op):
@@ -29,27 +36,218 @@ class Convolution(Op):
'kind': 'op',
'type': __class__.op,
'op': __class__.op,
- 'infer': caffe_conv2d_infer
+ 'infer': __class__.infer,
}, attrs)
- def supported_attrs(self):
+ def backend_attrs(self):
return [
- 'kernel',
- 'pad',
- 'stride',
- 'output',
- 'dilation'
+ 'auto_pad',
+ 'group',
+ ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))),
+ ('dilations', lambda node: ','.join(map(str, node['dilation'][node.spatial_dims]))),
+ ('kernel', lambda node: ','.join(map(str, node['kernel_spatial']))),
+
+ ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))),
+ ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))),
+ 'output'
]
- def backend_attrs(self):
+ def backend_attrs_v2(self):
return [
- spatial_getter('stride-x', 'stride', 0),
- spatial_getter('stride-y', 'stride', 1),
- spatial_getter('kernel-x', 'kernel', 0),
- spatial_getter('kernel-y', 'kernel', 1),
+ spatial_getter('stride-x', 'stride', 1),
+ spatial_getter('stride-y', 'stride', 0),
+
+ ('kernel-x', lambda node: node.kernel_spatial[1]),
+ ('kernel-y', lambda node: node.kernel_spatial[0]),
+
spatial_getter('dilation-x', 'dilation', 0),
spatial_getter('dilation-y', 'dilation', 1),
- spatial_getter('pad-x', 'pad', 0, lambda x: x[0]),
- spatial_getter('pad-y', 'pad', 1, lambda x: x[0]),
- 'output'
+ spatial_getter('pad-x', 'pad', 1, lambda x: x[0]),
+ spatial_getter('pad-y', 'pad', 0, lambda x: x[0]),
+ spatial_getter('pad-r', 'pad', 1, lambda x: x[1]),
+ spatial_getter('pad-b', 'pad', 0, lambda x: x[1]),
+
+ 'auto_pad',
+ 'output',
+ 'group',
]
+
+
+ @staticmethod
+ def calc_convolution(input_spatial_shape, stride_spatial_shape, pad_spatial_shape, kernel_extent):
+ ''' Calculates output shape for Convolution.
+ Verified to be applicable for both Caffe and ONNX.
+ '''
+ spatial_val_wo_stride = input_spatial_shape + pad_spatial_shape - kernel_extent
+ float_spatial_val_wo_stride = float_array(spatial_val_wo_stride)
+ return float_spatial_val_wo_stride / stride_spatial_shape + 1
+
+ @staticmethod
+ def calc_deconvolution(node, input_spatial_shape, pad_spatial_shape, kernel_extent, output_padding=None):
+ ''' Calculates output shape for Deconvolution.
+ Verified to be applicable for both Caffe and ONNX with explicitly defined pads.
+ If pads are not specified for ONNX operator, this function is not applicable.
+ '''
+ shape = node.stride[node.spatial_dims] * (input_spatial_shape - 1) + kernel_extent - pad_spatial_shape
+ if output_padding is not None:
+ shape += output_padding
+ return shape
+
+ @staticmethod
+ def infer(node: Node):
+ """
+ Infers shape of convolution node as it is done in ONNX.
+ It is very similar to one that Caffe does, but slightly different.
+ We made a complete fork of this function because they are supposed to be
+ supported differently by different people.
+ Args:
+ node: graph convolution node
+ """
+ input_shape = node.in_node(0).shape
+ if input_shape is None:
+ return
+
+ # bias_term cannot be deduced earlier for frameworks that represent
+ # convolution weights/biases as regular inputs; so the number of inputs
+ # is being checked here and restore correct value for bias_term to
+ # have the rest of the code unchanged. It will be used after we merge
+ # several infer functions for convolution in different FWs to a single one.
+ if not node.has_valid('bias_term'):
+ node['bias_term'] = len(node.in_nodes()) == 3
+
+ # In case of caffe we have to calculate input index for weights because
+ # caffe convolution can be with more than one input
+ weights_index = len(node.in_nodes()) - 2
+ if not node.bias_term:
+ weights_index = len(node.in_nodes()) - 1
+
+ # Reshape weights kernel to original shape
+ # In case of caffe ot MXNet framework, values for weights has no structed shape like OIHW
+ # so we have to reshape weights to normal shape
+ # For this case, Convolution node should have attribute reshape_kernel = True
+ if node.has_valid('reshape_kernel') and node.reshape_kernel:
+ if not (node.has_valid('output') and node.has_valid('channel_dims') and node.has_valid(
+ 'group') and node.has_valid('kernel_spatial')):
+ log.error('Can\'t reshape kernel due to not all required attrs was set to {} node'.format(node.id))
+ return
+ # layout for Convolution weights is OIHW
+ kernel_shape = np.array([node.output, input_shape[node.channel_dims].item() / node.group,
+ *[node.kernel_spatial[i] for i in range(len(node.kernel_spatial))]], dtype=np.int64)
+ if node.type == 'Deconvolution': # layout for Deconvolution weights is IOHW
+ kernel_shape[[0, 1]] = kernel_shape[[1, 0]]
+
+ node.in_node(weights_index).shape = np.array(kernel_shape)
+ node.in_node(weights_index).value = np.reshape(node.in_node(weights_index).value, kernel_shape)
+ node.reshape_kernel = False
+
+ # Pass weights shape to node attribute kernel_shape
+ kernel_shape = node.in_node(weights_index).shape
+ node['kernel_shape'] = kernel_shape
+ # Calculate kernel_spatial_idx and spatial_dims if it is not specified
+ # It is necessary for ONNX dut to convolution can be 1D/2D/3D
+ if not node.has_valid('kernel_spatial_idx'):
+ node['kernel_spatial_idx'] = np.delete([x for x in range(len(kernel_shape))], (node.input_feature_channel, node.output_feature_channel))
+
+ if not node.has_valid('spatial_dims'):
+ node['spatial_dims'] = np.delete([x for x in range(len(input_shape))], (node.channel_dims[0], node.batch_dims[0]))
+
+ node['kernel_spatial'] = kernel_shape[node.kernel_spatial_idx]
+
+ if not node.has_valid('output'):
+ # restore the number of output feature maps from the scond argument that is weights
+ if node.type in ['Convolution', 'Deconvolution']:
+ node['output'] = kernel_shape[node.output_feature_channel]
+ else:
+ raise Error(
+ 'Convolution infer function was called for a node {} with unsupported type {}',
+ node.soft_get('name'),
+ node.type
+ )
+
+ # Set default values for dilation, strides and pads if not set
+ if not node.has_valid('dilation'):
+ node['dilation'] = np.full([len(input_shape)], 1, dtype=np.int64)
+ if not node.has_valid('stride'):
+ node['stride'] = np.full([len(input_shape)], 1, dtype=np.int64)
+ if not node.has_valid('pad'):
+ node['pad'] = np.array([[0, 0]] * len(input_shape), dtype=np.int64)
+ node['pad_spatial_shape'] = node.pad[node.spatial_dims]
+
+ input_spatial_shape = input_shape[node.spatial_dims]
+ stride_spatial_shape = node.stride[node.spatial_dims]
+
+ kernel_extent = node.dilation[node.spatial_dims] * (node.kernel_spatial - 1) + 1
+ # TensorFlow always has auto_pad attribute that can be either valid or same_upper
+ # In ONNX auto_pad attribute is deprecated but appears in some models (could be valid, same_upper or same_lower)
+ # Caffe do not use auto_pad attribute
+ if node.has_valid('auto_pad') and not node.has_valid('output_spatial_shape'):
+ node['pad_spatial_shape'], node['output_spatial_shape'] = tf_window_op_pad_infer(input_spatial_shape,
+ kernel_extent,
+ stride_spatial_shape,
+ node.auto_pad)
+ pad = np.zeros((len(input_shape), 2), dtype=np.int64)
+ pad[node.spatial_dims] = node.pad_spatial_shape
+ node.pad = pad
+ else:
+ pad_spatial_shape = np.add.reduce(node.pad_spatial_shape, axis=1)
+ if node.type == 'Convolution':
+ float_spatial = Convolution.calc_convolution(input_spatial_shape, stride_spatial_shape,
+ pad_spatial_shape,
+ kernel_extent)
+ node['output_spatial_shape'] = int64_array(float_spatial)
+ elif node.type == 'Deconvolution':
+ # In case of given output_spatial_shape we calculate pads spatial
+ if node.has_valid('output_spatial_shape'):
+ if node.has_valid('get_pad'):
+ node['pad'] = node.get_pad(node, input_shape, kernel_shape)
+ else:
+ log.debug('Can\'t calculate paddings due to missing lambda get_pad in {} node'.format(node.id))
+ return
+ else:
+ output_padding = node.output_padding[node.spatial_dims] if node.has_valid('output_padding') else None
+ float_spatial = Convolution.calc_deconvolution(node, input_spatial_shape, pad_spatial_shape,
+ kernel_extent,
+ output_padding)
+ node['output_spatial_shape'] = int64_array(float_spatial)
+ else:
+ return
+
+ # For cases when group attribute wasn't set in extractor we should specify get_group attribute
+ # this attribute should store lambda node: ... (check tf convolution extractor)
+ if node.has_valid('get_group'):
+ node['group'] = node.get_group(node)
+ output_shape = np.full_like(input_shape, -1, dtype=np.int64)
+ output_shape[node.batch_dims] = input_shape[node.batch_dims]
+ output_shape[node.spatial_dims] = node.output_spatial_shape
+
+ # For cases when output attribute wasn't set in extractor we should specify get_output_feature_dim attribute
+ # this attribute should store lambda node: ... (check tf convolution extractor)
+ if node.has_valid('get_output_feature_dim'):
+ node['output'] = node.get_output_feature_dim(node)
+ output_shape[node.channel_dims] = node.output
+ node['output_shape'] = output_shape
+
+ for n in node.out_nodes():
+ node.out_node(n).shape = output_shape
+
+ mark_input_bins(node)
+ assign_dims_to_weights(node.in_node(weights_index), node.kernel_spatial_idx, node.input_feature_channel,
+ node.output_feature_channel, len(kernel_shape))
+
+ PermuteAttrs.create_permute_attrs(node, attrs=[('pad', 'input:0'),
+ ('stride', 'input:0'),
+ ('dilation', 'input:0'),
+ ('output_shape', 'input:0'),
+ ('batch_dims', 'input:0'),
+ ('channel_dims', 'input:0'),
+ ('spatial_dims', 'input:0'),
+
+ ('kernel_shape', 'input:{}'.format(weights_index)),
+ ('kernel_spatial_idx', 'input:{}'.format(weights_index)),
+ ('input_feature_channel', 'input:{}'.format(weights_index)),
+ ('output_feature_channel', 'input:{}'.format(weights_index)),
+ ])
+
+ PermuteAttrs.set_permutation(node.in_node(weights_index), node,
+ node.get_weights_permute if node.has_valid('get_weights_permute') else None,
+ skip_if_exists=True)