diff options
Diffstat (limited to 'model-optimizer/mo/ops/convolution.py')
-rw-r--r-- | model-optimizer/mo/ops/convolution.py | 232 |
1 files changed, 215 insertions, 17 deletions
diff --git a/model-optimizer/mo/ops/convolution.py b/model-optimizer/mo/ops/convolution.py index b206c4fe8..fc08c4b4b 100644 --- a/model-optimizer/mo/ops/convolution.py +++ b/model-optimizer/mo/ops/convolution.py @@ -14,11 +14,18 @@ limitations under the License. """ +import logging as log + import networkx as nx +import numpy as np -from mo.front.common.partial_infer.convolution import caffe_conv2d_infer +from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \ + tf_window_op_pad_infer from mo.front.extractor import spatial_getter -from mo.ops.op import Op +from mo.front.onnx.extractors.utils import get_backend_pad +from mo.graph.graph import Node +from mo.ops.op import Op, PermuteAttrs +from mo.utils.error import Error class Convolution(Op): @@ -29,27 +36,218 @@ class Convolution(Op): 'kind': 'op', 'type': __class__.op, 'op': __class__.op, - 'infer': caffe_conv2d_infer + 'infer': __class__.infer, }, attrs) - def supported_attrs(self): + def backend_attrs(self): return [ - 'kernel', - 'pad', - 'stride', - 'output', - 'dilation' + 'auto_pad', + 'group', + ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))), + ('dilations', lambda node: ','.join(map(str, node['dilation'][node.spatial_dims]))), + ('kernel', lambda node: ','.join(map(str, node['kernel_spatial']))), + + ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))), + ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))), + 'output' ] - def backend_attrs(self): + def backend_attrs_v2(self): return [ - spatial_getter('stride-x', 'stride', 0), - spatial_getter('stride-y', 'stride', 1), - spatial_getter('kernel-x', 'kernel', 0), - spatial_getter('kernel-y', 'kernel', 1), + spatial_getter('stride-x', 'stride', 1), + spatial_getter('stride-y', 'stride', 0), + + ('kernel-x', lambda node: node.kernel_spatial[1]), + ('kernel-y', lambda node: node.kernel_spatial[0]), + spatial_getter('dilation-x', 'dilation', 0), spatial_getter('dilation-y', 'dilation', 1), - spatial_getter('pad-x', 'pad', 0, lambda x: x[0]), - spatial_getter('pad-y', 'pad', 1, lambda x: x[0]), - 'output' + spatial_getter('pad-x', 'pad', 1, lambda x: x[0]), + spatial_getter('pad-y', 'pad', 0, lambda x: x[0]), + spatial_getter('pad-r', 'pad', 1, lambda x: x[1]), + spatial_getter('pad-b', 'pad', 0, lambda x: x[1]), + + 'auto_pad', + 'output', + 'group', ] + + + @staticmethod + def calc_convolution(input_spatial_shape, stride_spatial_shape, pad_spatial_shape, kernel_extent): + ''' Calculates output shape for Convolution. + Verified to be applicable for both Caffe and ONNX. + ''' + spatial_val_wo_stride = input_spatial_shape + pad_spatial_shape - kernel_extent + float_spatial_val_wo_stride = float_array(spatial_val_wo_stride) + return float_spatial_val_wo_stride / stride_spatial_shape + 1 + + @staticmethod + def calc_deconvolution(node, input_spatial_shape, pad_spatial_shape, kernel_extent, output_padding=None): + ''' Calculates output shape for Deconvolution. + Verified to be applicable for both Caffe and ONNX with explicitly defined pads. + If pads are not specified for ONNX operator, this function is not applicable. + ''' + shape = node.stride[node.spatial_dims] * (input_spatial_shape - 1) + kernel_extent - pad_spatial_shape + if output_padding is not None: + shape += output_padding + return shape + + @staticmethod + def infer(node: Node): + """ + Infers shape of convolution node as it is done in ONNX. + It is very similar to one that Caffe does, but slightly different. + We made a complete fork of this function because they are supposed to be + supported differently by different people. + Args: + node: graph convolution node + """ + input_shape = node.in_node(0).shape + if input_shape is None: + return + + # bias_term cannot be deduced earlier for frameworks that represent + # convolution weights/biases as regular inputs; so the number of inputs + # is being checked here and restore correct value for bias_term to + # have the rest of the code unchanged. It will be used after we merge + # several infer functions for convolution in different FWs to a single one. + if not node.has_valid('bias_term'): + node['bias_term'] = len(node.in_nodes()) == 3 + + # In case of caffe we have to calculate input index for weights because + # caffe convolution can be with more than one input + weights_index = len(node.in_nodes()) - 2 + if not node.bias_term: + weights_index = len(node.in_nodes()) - 1 + + # Reshape weights kernel to original shape + # In case of caffe ot MXNet framework, values for weights has no structed shape like OIHW + # so we have to reshape weights to normal shape + # For this case, Convolution node should have attribute reshape_kernel = True + if node.has_valid('reshape_kernel') and node.reshape_kernel: + if not (node.has_valid('output') and node.has_valid('channel_dims') and node.has_valid( + 'group') and node.has_valid('kernel_spatial')): + log.error('Can\'t reshape kernel due to not all required attrs was set to {} node'.format(node.id)) + return + # layout for Convolution weights is OIHW + kernel_shape = np.array([node.output, input_shape[node.channel_dims].item() / node.group, + *[node.kernel_spatial[i] for i in range(len(node.kernel_spatial))]], dtype=np.int64) + if node.type == 'Deconvolution': # layout for Deconvolution weights is IOHW + kernel_shape[[0, 1]] = kernel_shape[[1, 0]] + + node.in_node(weights_index).shape = np.array(kernel_shape) + node.in_node(weights_index).value = np.reshape(node.in_node(weights_index).value, kernel_shape) + node.reshape_kernel = False + + # Pass weights shape to node attribute kernel_shape + kernel_shape = node.in_node(weights_index).shape + node['kernel_shape'] = kernel_shape + # Calculate kernel_spatial_idx and spatial_dims if it is not specified + # It is necessary for ONNX dut to convolution can be 1D/2D/3D + if not node.has_valid('kernel_spatial_idx'): + node['kernel_spatial_idx'] = np.delete([x for x in range(len(kernel_shape))], (node.input_feature_channel, node.output_feature_channel)) + + if not node.has_valid('spatial_dims'): + node['spatial_dims'] = np.delete([x for x in range(len(input_shape))], (node.channel_dims[0], node.batch_dims[0])) + + node['kernel_spatial'] = kernel_shape[node.kernel_spatial_idx] + + if not node.has_valid('output'): + # restore the number of output feature maps from the scond argument that is weights + if node.type in ['Convolution', 'Deconvolution']: + node['output'] = kernel_shape[node.output_feature_channel] + else: + raise Error( + 'Convolution infer function was called for a node {} with unsupported type {}', + node.soft_get('name'), + node.type + ) + + # Set default values for dilation, strides and pads if not set + if not node.has_valid('dilation'): + node['dilation'] = np.full([len(input_shape)], 1, dtype=np.int64) + if not node.has_valid('stride'): + node['stride'] = np.full([len(input_shape)], 1, dtype=np.int64) + if not node.has_valid('pad'): + node['pad'] = np.array([[0, 0]] * len(input_shape), dtype=np.int64) + node['pad_spatial_shape'] = node.pad[node.spatial_dims] + + input_spatial_shape = input_shape[node.spatial_dims] + stride_spatial_shape = node.stride[node.spatial_dims] + + kernel_extent = node.dilation[node.spatial_dims] * (node.kernel_spatial - 1) + 1 + # TensorFlow always has auto_pad attribute that can be either valid or same_upper + # In ONNX auto_pad attribute is deprecated but appears in some models (could be valid, same_upper or same_lower) + # Caffe do not use auto_pad attribute + if node.has_valid('auto_pad') and not node.has_valid('output_spatial_shape'): + node['pad_spatial_shape'], node['output_spatial_shape'] = tf_window_op_pad_infer(input_spatial_shape, + kernel_extent, + stride_spatial_shape, + node.auto_pad) + pad = np.zeros((len(input_shape), 2), dtype=np.int64) + pad[node.spatial_dims] = node.pad_spatial_shape + node.pad = pad + else: + pad_spatial_shape = np.add.reduce(node.pad_spatial_shape, axis=1) + if node.type == 'Convolution': + float_spatial = Convolution.calc_convolution(input_spatial_shape, stride_spatial_shape, + pad_spatial_shape, + kernel_extent) + node['output_spatial_shape'] = int64_array(float_spatial) + elif node.type == 'Deconvolution': + # In case of given output_spatial_shape we calculate pads spatial + if node.has_valid('output_spatial_shape'): + if node.has_valid('get_pad'): + node['pad'] = node.get_pad(node, input_shape, kernel_shape) + else: + log.debug('Can\'t calculate paddings due to missing lambda get_pad in {} node'.format(node.id)) + return + else: + output_padding = node.output_padding[node.spatial_dims] if node.has_valid('output_padding') else None + float_spatial = Convolution.calc_deconvolution(node, input_spatial_shape, pad_spatial_shape, + kernel_extent, + output_padding) + node['output_spatial_shape'] = int64_array(float_spatial) + else: + return + + # For cases when group attribute wasn't set in extractor we should specify get_group attribute + # this attribute should store lambda node: ... (check tf convolution extractor) + if node.has_valid('get_group'): + node['group'] = node.get_group(node) + output_shape = np.full_like(input_shape, -1, dtype=np.int64) + output_shape[node.batch_dims] = input_shape[node.batch_dims] + output_shape[node.spatial_dims] = node.output_spatial_shape + + # For cases when output attribute wasn't set in extractor we should specify get_output_feature_dim attribute + # this attribute should store lambda node: ... (check tf convolution extractor) + if node.has_valid('get_output_feature_dim'): + node['output'] = node.get_output_feature_dim(node) + output_shape[node.channel_dims] = node.output + node['output_shape'] = output_shape + + for n in node.out_nodes(): + node.out_node(n).shape = output_shape + + mark_input_bins(node) + assign_dims_to_weights(node.in_node(weights_index), node.kernel_spatial_idx, node.input_feature_channel, + node.output_feature_channel, len(kernel_shape)) + + PermuteAttrs.create_permute_attrs(node, attrs=[('pad', 'input:0'), + ('stride', 'input:0'), + ('dilation', 'input:0'), + ('output_shape', 'input:0'), + ('batch_dims', 'input:0'), + ('channel_dims', 'input:0'), + ('spatial_dims', 'input:0'), + + ('kernel_shape', 'input:{}'.format(weights_index)), + ('kernel_spatial_idx', 'input:{}'.format(weights_index)), + ('input_feature_channel', 'input:{}'.format(weights_index)), + ('output_feature_channel', 'input:{}'.format(weights_index)), + ]) + + PermuteAttrs.set_permutation(node.in_node(weights_index), node, + node.get_weights_permute if node.has_valid('get_weights_permute') else None, + skip_if_exists=True) |