summaryrefslogtreecommitdiff
path: root/torch/nn/modules/fold.py
blob: 1b285b86672996569b71c4648f57effa73dc2861 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from .module import Module
from .. import functional as F


class Fold(Module):
    """
    De-interleaves vectors of length :math:`\prod(kernel_size)` from the "channel"
    dimension of the input tensor to generate blocks of size :math:`kernel_size`
    of the output.  These blocks populate the "spatial" dimensions [2:]
    of the output via a sliding window with positions determined by the
    padding, stride and dilation values.  The "channel" dimension 1 of the output
    is determined by the vectors interleaevd position in the "channel" dimension
    of the input.

    Each element of the output batch dimension 0 has :math:`C / \prod(kernel_size)`
    channels (dimension 1) and spatial dimensions [2:] of shape :math:`output_size`.

    | If :attr:`padding` is non-zero, then the input is implicitly
    zero-padded on both sides by :attr:`padding` number of points
    | :attr:`dilation` controls the intenal spacing between the kernel points in the output.
    It is harder to describe, but this `link`_ has a nice visualization of what
    dilation does.

    Args:
        output_size (int or tuple): the shape of the spatial dimensions [2:] of the output
        kernel_size (int or tuple): the size of the sliding blocks to convert
                                    to columns.
        stride (int or tuple): the stride of the sliding blocks in the input
                               spatial dimensions. Default: 1
        padding (int or tuple, optional): implicit zero padding to be added on
                                          both sides of input. Default: 0
        dilation (int or tuple, optional): a parameter that controls the
                                           stride of elements within the
                                           neighborhood. Default: 1

    | If :attr:`output_size`, :attr:`kernel_size`, :attr:`dilation`,
    :attr:`padding` or :attr:`stride` is of length 1 then
    their value will be replicated across all spatial dimensions

    | For the case of two output spatial dimensions this operation is sometimes called col2im

    Shape:
        - Input: :math:`(N, C, L_{in})`
        - Output: :math:`(N * C * \prod(kernel_size), L_{out},)` where
          :math:`L_{out} = floor((L_{in} + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)

    Examples::

        >>> # output_size (3, 3) kernel_size (2, 2), dilation (1, 1), padding (0, 0), stride (1, 1)
        >>> fold = nn.Fold((3, 3), (2, 2), (1, 1), (0, 0), (1, 1))
        >>> input = torch.randn(1, 36, 1)
        >>> output = unfold(input)

    .. _link:
        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md

    """

    def __init__(self, output_size, kernel_size, dilation=1, padding=0, stride=1):
        super(Fold, self).__init__()
        self.output_size = output_size
        self.kernel_size = kernel_size
        self.dilation = dilation
        self.padding = padding
        self.stride = stride

    def forward(self, input):
        return F.fold(input, self.output_size, self.kernel_size, self.dilation,
                      self.padding, self.stride)

    def extra_repr(self):
        return 'output_size={output_size}, kernel_size={kernel_size}, ' \
            'dilation={dilation}, padding={padding}, stride={stride}'.format(
                **self.__dict__
            )


class Unfold(Module):
    """

    Converts each sliding :math:`kernel_size` block of the "spatial" dimensions [2:]
    of the input tensor into a column of the output. These columns are interleaved
    with the "channel" dimension 1 such that in the output the channel dimension combines
    both the spatial position of the block within the input and the original
    channel position. We denote size of the "batch" dimension 0 as :math:`N`.

    Each element of the output batch dimension 0 has :math:`C * \prod(kernel_size)`
    rows and contains as many columns as there are :math:`kernel_size` neighborhoods
    of the input according to the padding, stride and dilation values.

    | If :attr:`padding` is non-zero, then the input is implicitly
    zero-padded on both sides by :attr:`padding` number of points before reshaping
    | :attr:`dilation` controls the internal spacing between the kernel points.
    It is harder to describe, but this `link`_ has a nice visualization of what
    dilation does.

    Args:
        kernel_size (int or tuple): the size of the sliding blocks to convert
                                    to columns.
        stride (int or tuple, optional): the stride of the sliding blocks in the input
                                         spatial dimensions. Default: 1
        padding (int or tuple, optional): implicit zero padding to be added on
                                          both sides of input. Default: 0
        dilation (int or tuple, optional): a parameter that controls the
                                           stride of elements within the
                                           neighborhood. Default: 1

    | If :attr:`kernel_size`, :attr:`dilation`, :attr:`padding` or :attr:`stride`
    is of length 1 then their value will be replicated across all spatial dimensions

    | For the case of two input spatial dimensions this operation is sometimes called im2col

    Shape:
        - Input: :math:`(N, C, L_{in})`
        - Output: :math:`(N, C * \prod(kernel_size), L_{out},)` where
          :math:`L_{out} = floor((L_{in} + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)

    Examples::

        >>> # kernel_size (2, 2), dilation (1, 1), padding (0, 0), stride (1, 1)
        >>> unfold = nn.Unfold((3, 3), (1, 1), (0, 0), (1, 1))
        >>> input = torch.randn(2, 4, 3, 3)
        >>> output = unfold(input)

    .. _link:
        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md

    """

    def __init__(self, kernel_size, dilation=1, padding=0, stride=1):
        super(Unfold, self).__init__()
        self.kernel_size = kernel_size
        self.dilation = dilation
        self.padding = padding
        self.stride = stride

    def forward(self, input):
        return F.unfold(input, self.kernel_size, self.dilation,
                        self.padding, self.stride)

    def extra_repr(self):
        return 'kernel_size={kernel_size}, dilation={dilation}, padding={padding},' \
            ' stride={stride}'.format(**self.__dict__)