diff options
Diffstat (limited to 'torch/nn/modules/rnn.py')
-rw-r--r-- | torch/nn/modules/rnn.py | 214 |
1 files changed, 128 insertions, 86 deletions
diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py index e1a7bc4af2..26aed4acdb 100644 --- a/torch/nn/modules/rnn.py +++ b/torch/nn/modules/rnn.py @@ -133,7 +133,8 @@ class RNNBase(Module): class RNN(RNNBase): - r"""Applies a multi-layer Elman RNN with tanh or ReLU non-linearity to an input sequence. + r"""Applies a multi-layer Elman RNN with tanh or ReLU non-linearity to an + input sequence. For each element in the input sequence, each layer computes the following @@ -143,40 +144,49 @@ class RNN(RNNBase): h_t = \tanh(w_{ih} * x_t + b_{ih} + w_{hh} * h_{(t-1)} + b_{hh}) - where :math:`h_t` is the hidden state at time `t`, and :math:`x_t` is the hidden - state of the previous layer at time `t` or :math:`input_t` for the first layer. - If nonlinearity='relu', then `ReLU` is used instead of `tanh`. + where :math:`h_t` is the hidden state at time `t`, and :math:`x_t` is + the hidden state of the previous layer at time `t` or :math:`input_t` + for the first layer. If nonlinearity='relu', then `ReLU` is used instead + of `tanh`. Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h num_layers: Number of recurrent layers. nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh' - bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True - batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) - dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer + bias: If False, then the layer does not use bias weights b_ih and b_hh. + Default: True + batch_first: If True, then the input and output tensors are provided + as (batch, seq, feature) + dropout: If non-zero, introduces a dropout layer on the outputs of each + RNN layer except the last layer bidirectional: If True, becomes a bidirectional RNN. Default: False Inputs: input, h_0 - - **input** (seq_len, batch, input_size): tensor containing the features of the input sequence. - The input can also be a packed variable length sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` + - **input** (seq_len, batch, input_size): tensor containing the features + of the input sequence. The input can also be a packed variable length + sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` for details. - - **h_0** (num_layers * num_directions, batch, hidden_size): tensor containing the initial hidden state - for each element in the batch. + - **h_0** (num_layers * num_directions, batch, hidden_size): tensor + containing the initial hidden state for each element in the batch. Outputs: output, h_n - - **output** (seq_len, batch, hidden_size * num_directions): tensor containing the output features (h_k) - from the last layer of the RNN, for each k. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given - as the input, the output will also be a packed sequence. - - **h_n** (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for k=seq_len. + - **output** (seq_len, batch, hidden_size * num_directions): tensor + containing the output features (h_k) from the last layer of the RNN, + for each k. If a :class:`torch.nn.utils.rnn.PackedSequence` has + been given as the input, the output will also be a packed sequence. + - **h_n** (num_layers * num_directions, batch, hidden_size): tensor + containing the hidden state for k=seq_len. Attributes: weight_ih_l[k]: the learnable input-hidden weights of the k-th layer, - of shape `(input_size x hidden_size)` + of shape `(input_size x hidden_size)` weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer, - of shape `(hidden_size x hidden_size)` - bias_ih_l[k]: the learnable input-hidden bias of the k-th layer, of shape `(hidden_size)` - bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer, of shape `(hidden_size)` + of shape `(hidden_size x hidden_size)` + bias_ih_l[k]: the learnable input-hidden bias of the k-th layer, + of shape `(hidden_size)` + bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer, + of shape `(hidden_size)` Examples:: @@ -203,7 +213,8 @@ class RNN(RNNBase): class LSTM(RNNBase): - r"""Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence. + r"""Applies a multi-layer long short-term memory (LSTM) RNN to an input + sequence. For each element in the input sequence, each layer computes the following @@ -220,47 +231,54 @@ class LSTM(RNNBase): h_t = o_t * \tanh(c_t) \end{array} - where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`, - :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer, - and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, - cell, and out gates, respectively. + where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell + state at time `t`, :math:`x_t` is the hidden state of the previous layer at + time `t` or :math:`input_t` for the first layer, and :math:`i_t`, + :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, + and out gates, respectively. Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h num_layers: Number of recurrent layers. - bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True - batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) - dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer + bias: If False, then the layer does not use bias weights b_ih and b_hh. + Default: True + batch_first: If True, then the input and output tensors are provided + as (batch, seq, feature) + dropout: If non-zero, introduces a dropout layer on the outputs of each + RNN layer except the last layer bidirectional: If True, becomes a bidirectional RNN. Default: False Inputs: input, (h_0, c_0) - - **input** (seq_len, batch, input_size): tensor containing the features of the input sequence. - The input can also be a packed variable length sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` - for details. - - **h_0** (num_layers \* num_directions, batch, hidden_size): tensor containing - the initial hidden state for each element in the batch. - - **c_0** (num_layers \* num_directions, batch, hidden_size): tensor containing - the initial cell state for each element in the batch. + - **input** (seq_len, batch, input_size): tensor containing the features + of the input sequence. + The input can also be a packed variable length sequence. + See :func:`torch.nn.utils.rnn.pack_padded_sequence` for details. + - **h_0** (num_layers \* num_directions, batch, hidden_size): tensor + containing the initial hidden state for each element in the batch. + - **c_0** (num_layers \* num_directions, batch, hidden_size): tensor + containing the initial cell state for each element in the batch. Outputs: output, (h_n, c_n) - - **output** (seq_len, batch, hidden_size * num_directions): tensor containing - the output features `(h_t)` from the last layer of the RNN, for each t. If a - :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a - packed sequence. - - **h_n** (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for t=seq_len - - **c_n** (num_layers * num_directions, batch, hidden_size): tensor containing the cell state for t=seq_len + - **output** (seq_len, batch, hidden_size * num_directions): tensor + containing the output features `(h_t)` from the last layer of the RNN, + for each t. If a :class:`torch.nn.utils.rnn.PackedSequence` has been + given as the input, the output will also be a packed sequence. + - **h_n** (num_layers * num_directions, batch, hidden_size): tensor + containing the hidden state for t=seq_len + - **c_n** (num_layers * num_directions, batch, hidden_size): tensor + containing the cell state for t=seq_len Attributes: - weight_ih_l[k] : the learnable input-hidden weights of the k-th layer `(W_ii|W_if|W_ig|W_io)`, of shape - `(4*hidden_size x input_size)` - weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer `(W_hi|W_hf|W_hg|W_ho)`, of shape - `(4*hidden_size x hidden_size)` - bias_ih_l[k] : the learnable input-hidden bias of the k-th layer `(b_ii|b_if|b_ig|b_io)`, of shape - `(4*hidden_size)` - bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer `(b_hi|b_hf|b_hg|b_ho)`, of shape - `(4*hidden_size)` + weight_ih_l[k] : the learnable input-hidden weights of the k-th layer + `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size x input_size)` + weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer + `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size x hidden_size)` + bias_ih_l[k] : the learnable input-hidden bias of the k-th layer + `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)` + bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer + `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)` Examples:: @@ -292,40 +310,47 @@ class GRU(RNNBase): \end{array} where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the hidden - state of the previous layer at time `t` or :math:`input_t` for the first layer, - and :math:`r_t`, :math:`z_t`, :math:`n_t` are the reset, input, and new gates, respectively. + state of the previous layer at time `t` or :math:`input_t` for the first + layer, and :math:`r_t`, :math:`z_t`, :math:`n_t` are the reset, input, + and new gates, respectively. Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h num_layers: Number of recurrent layers. - bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True - batch_first: If True, then the input and output tensors are provided as (batch, seq, feature) - dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer + bias: If False, then the layer does not use bias weights b_ih and b_hh. + Default: True + batch_first: If True, then the input and output tensors are provided + as (batch, seq, feature) + dropout: If non-zero, introduces a dropout layer on the outputs of each + RNN layer except the last layer bidirectional: If True, becomes a bidirectional RNN. Default: False Inputs: input, h_0 - - **input** (seq_len, batch, input_size): tensor containing the features of the input sequence. - The input can also be a packed variable length sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` + - **input** (seq_len, batch, input_size): tensor containing the features + of the input sequence. The input can also be a packed variable length + sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence` for details. - - **h_0** (num_layers * num_directions, batch, hidden_size): tensor containing the initial - hidden state for each element in the batch. + - **h_0** (num_layers * num_directions, batch, hidden_size): tensor + containing the initial hidden state for each element in the batch. Outputs: output, h_n - - **output** (seq_len, batch, hidden_size * num_directions): tensor containing the output features h_t from - the last layer of the RNN, for each t. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given as the - input, the output will also be a packed sequence. - - **h_n** (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for t=seq_len + - **output** (seq_len, batch, hidden_size * num_directions): tensor + containing the output features h_t from the last layer of the RNN, + for each t. If a :class:`torch.nn.utils.rnn.PackedSequence` has been + given as the input, the output will also be a packed sequence. + - **h_n** (num_layers * num_directions, batch, hidden_size): tensor + containing the hidden state for t=seq_len Attributes: - weight_ih_l[k] : the learnable input-hidden weights of the k-th layer (W_ir|W_iz|W_in), of shape - `(3*hidden_size x input_size)` - weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer (W_hr|W_hz|W_hn), of shape - `(3*hidden_size x hidden_size)` - bias_ih_l[k] : the learnable input-hidden bias of the k-th layer (b_ir|b_iz|b_in), of shape - `(3*hidden_size)` - bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer (b_hr|b_hz|b_hn), of shape - `(3*hidden_size)` + weight_ih_l[k] : the learnable input-hidden weights of the k-th layer + (W_ir|W_iz|W_in), of shape `(3*hidden_size x input_size)` + weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer + (W_hr|W_hz|W_hn), of shape `(3*hidden_size x hidden_size)` + bias_ih_l[k] : the learnable input-hidden bias of the k-th layer + (b_ir|b_iz|b_in), of shape `(3*hidden_size)` + bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer + (b_hr|b_hz|b_hn), of shape `(3*hidden_size)` Examples:: >>> rnn = nn.GRU(10, 20, 2) @@ -362,19 +387,24 @@ class RNNCell(RNNCellBase): Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h - bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True + bias: If False, then the layer does not use bias weights b_ih and b_hh. + Default: True nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh' Inputs: input, hidden - **input** (batch, input_size): tensor containing input features - - **hidden** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. + - **hidden** (batch, hidden_size): tensor containing the initial hidden + state for each element in the batch. Outputs: h' - - **h'** (batch, hidden_size): tensor containing the next hidden state for each element in the batch + - **h'** (batch, hidden_size): tensor containing the next hidden state + for each element in the batch Attributes: - weight_ih: the learnable input-hidden weights, of shape `(input_size x hidden_size)` - weight_hh: the learnable hidden-hidden weights, of shape `(hidden_size x hidden_size)` + weight_ih: the learnable input-hidden weights, of shape + `(input_size x hidden_size)` + weight_hh: the learnable hidden-hidden weights, of shape + `(hidden_size x hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(hidden_size)` @@ -443,20 +473,27 @@ class LSTMCell(RNNCellBase): Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h - bias: If `False`, then the layer does not use bias weights `b_ih` and `b_hh`. Default: True + bias: If `False`, then the layer does not use bias weights `b_ih` and + `b_hh`. Default: True Inputs: input, (h_0, c_0) - **input** (batch, input_size): tensor containing input features - - **h_0** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. - - **c_0** (batch. hidden_size): tensor containing the initial cell state for each element in the batch. + - **h_0** (batch, hidden_size): tensor containing the initial hidden + state for each element in the batch. + - **c_0** (batch. hidden_size): tensor containing the initial cell state + for each element in the batch. Outputs: h_1, c_1 - - **h_1** (batch, hidden_size): tensor containing the next hidden state for each element in the batch - - **c_1** (batch, hidden_size): tensor containing the next cell state for each element in the batch + - **h_1** (batch, hidden_size): tensor containing the next hidden state + for each element in the batch + - **c_1** (batch, hidden_size): tensor containing the next cell state + for each element in the batch Attributes: - weight_ih: the learnable input-hidden weights, of shape `(input_size x hidden_size)` - weight_hh: the learnable hidden-hidden weights, of shape `(hidden_size x hidden_size)` + weight_ih: the learnable input-hidden weights, of shape + `(input_size x hidden_size)` + weight_hh: the learnable hidden-hidden weights, of shape + `(hidden_size x hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(hidden_size)` @@ -515,18 +552,23 @@ class GRUCell(RNNCellBase): Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h - bias: If `False`, then the layer does not use bias weights `b_ih` and `b_hh`. Default: `True` + bias: If `False`, then the layer does not use bias weights `b_ih` and + `b_hh`. Default: `True` Inputs: input, hidden - **input** (batch, input_size): tensor containing input features - - **hidden** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. + - **hidden** (batch, hidden_size): tensor containing the initial hidden + state for each element in the batch. Outputs: h' - - **h'**: (batch, hidden_size): tensor containing the next hidden state for each element in the batch + - **h'**: (batch, hidden_size): tensor containing the next hidden state + for each element in the batch Attributes: - weight_ih: the learnable input-hidden weights, of shape `(input_size x hidden_size)` - weight_hh: the learnable hidden-hidden weights, of shape `(hidden_size x hidden_size)` + weight_ih: the learnable input-hidden weights, of shape + `(input_size x hidden_size)` + weight_hh: the learnable hidden-hidden weights, of shape + `(hidden_size x hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(hidden_size)` |