From 1b48f2f751291299648b360e901c62e3803de0d9 Mon Sep 17 00:00:00 2001 From: smallv0221 <33639025+smallv0221@users.noreply.github.com> Date: Tue, 13 Oct 2020 20:10:04 +0800 Subject: [PATCH] Fix en doc for rnn.py. test=document_fix (#27835) * Fix en doc for rnn.py. test=document_fix --- python/paddle/nn/layer/rnn.py | 161 +++++++++++++++++++++++----------- 1 file changed, 110 insertions(+), 51 deletions(-) diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 0687fefe005..7e9c31f422c 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -48,7 +48,7 @@ def split_states(states, bidirectional=False, state_components=1): Split states of RNN network into possibly nested list or tuple of states of each RNN cells of the RNN network. - Arguments: + Parameters: states (Tensor|tuple|list): the concatenated states for RNN network. When `state_components` is 1, states in a Tensor with shape `(L*D, N, C)` where `L` is the number of layers of the RNN @@ -101,7 +101,7 @@ def concat_states(states, bidirectional=False, state_components=1): Concatenate a possibly nested list or tuple of RNN cell states into a compact form. - Arguments: + Parameters: states (list|tuple): a possibly nested list or tuple of RNN cell states. If `bidirectional` is True, it can be indexed twice to get an @@ -154,13 +154,14 @@ class RNNCellBase(Layer): r""" Generate initialized states according to provided shape, data type and value. - Arguments: + + Parameters: batch_ref (Tensor): A tensor, which shape would be used to determine the batch size, which is used to generate initial states. For `batch_ref`'s shape d, `d[batch_dim_idx]` is treated as batch size. shape (list|tuple, optional): A (possibly nested structure of) shape[s], - where a shape is a list/tuple of integer). `-1` (for batch size) + where a shape is a list/tuple of integer. `-1` (for batch size) will be automatically prepended if a shape does not starts with it. If None, property `state_shape` will be used. Defaults to None. @@ -174,6 +175,7 @@ class RNNCellBase(Layer): Defaults to 0. batch_dim_idx (int, optional): An integer indicating which dimension of the of `batch_ref` represents batch. Defaults to 0. + Returns: init_states (Tensor|tuple|list): tensor of the provided shape and dtype, or list of tensors that each satisfies the requirements, @@ -268,16 +270,14 @@ class SimpleRNNCell(RNNCellBase): The formula used is as follows: .. math:: - h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh}) + h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh}) + y_{t} & = h_{t} - - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise - multiplication operator. Please refer to `Finding Structure in Time `_ for more details. - Arguments: + Parameters: input_size (int): The input size. hidden_size (int): The hidden size. activation (str, optional): The activation in the SimpleRNN cell. @@ -293,7 +293,7 @@ class SimpleRNNCell(RNNCellBase): name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - Parameters: + Attributes: weight_ih (Parameter): shape (hidden_size, input_size), input to hidden weight, corresponding to :math:`W_{ih}` in the formula. weight_hh (Parameter): shape (hidden_size, hidden_size), hidden to @@ -329,13 +329,15 @@ class SimpleRNNCell(RNNCellBase): .. code-block:: python import paddle - paddle.disable_static() x = paddle.randn((4, 16)) prev_h = paddle.randn((4, 32)) cell = paddle.nn.SimpleRNNCell(16, 32) y, h = cell(x, prev_h) + print(y.shape) + + #[4,32] """ @@ -407,20 +409,26 @@ class LSTMCell(RNNCellBase): .. math:: i_{t} & = \sigma(W_{ii}x_{t} + b_{ii} + W_{hi}h_{t-1} + b_{hi}) + f_{t} & = \sigma(W_{if}x_{t} + b_{if} + W_{hf}h_{t-1} + b_{hf}) + o_{t} & = \sigma(W_{io}x_{t} + b_{io} + W_{ho}h_{t-1} + b_{ho}) - \\widetilde{c}_{t} & = \\tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg}) - c_{t} & = f_{t} \* c{t-1} + i{t} \* \\widetile{c}_{t} - h_{t} & = o_{t} \* \\tanh(c_{t}) + + \widetilde{c}_{t} & = \tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg}) + + c_{t} & = f_{t} * c_{t-1} + i_{t} * \widetilde{c}_{t} + + h_{t} & = o_{t} * \tanh(c_{t}) + y_{t} & = h_{t} - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise + where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise multiplication operator. Please refer to `An Empirical Exploration of Recurrent Network Architectures `_ for more details. - Arguments: + Parameters: input_size (int): The input size. hidden_size (int): The hidden size. weight_ih_attr(ParamAttr, optional): The parameter attribute for @@ -434,7 +442,7 @@ class LSTMCell(RNNCellBase): name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - Parameters: + Attributes: weight_ih (Parameter): shape (4 * hidden_size, input_size), input to hidden weight, which corresponds to the concatenation of :math:`W_{ii}, W_{if}, W_{ig}, W_{io}` in the formula. @@ -462,7 +470,7 @@ class LSTMCell(RNNCellBase): corresponding to :math:`h_{t}` in the formula. states (tuple): a tuple of two tensors, each of shape `[batch_size, hidden_size]`, the new hidden states, - corresponding to :math:`h_{t}, c{t}` in the formula. + corresponding to :math:`h_{t}, c_{t}` in the formula. Notes: All the weights and bias are initialized with `Uniform(-std, std)` by @@ -475,7 +483,6 @@ class LSTMCell(RNNCellBase): .. code-block:: python import paddle - paddle.disable_static() x = paddle.randn((4, 16)) prev_h = paddle.randn((4, 32)) @@ -484,6 +491,14 @@ class LSTMCell(RNNCellBase): cell = paddle.nn.LSTMCell(16, 32) y, (h, c) = cell(x, (prev_h, prev_c)) + print(y.shape) + print(h.shape) + print(c.shape) + + #[4,32] + #[4,32] + #[4,32] + """ def __init__(self, @@ -559,15 +574,19 @@ class GRUCell(RNNCellBase): The formula for GRU used is as follows: - .. math:: + .. math:: r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t} + b_{hr}) - z_{t} & = \sigma(W_{iz)x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz}) - \\widetilde{h}_{t} & = \\tanh(W_{ic)x_{t} + b_{ic} + r_{t} \* (W_{hc}x_{t} + b{hc})) - h_{t} & = z_{t} \* h_{t-1} + (1 - z_{t}) \* \\widetilde{h}_{t} + + z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz}) + + \widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t} + b_{hc})) + + h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t} + y_{t} & = h_{t} - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise + where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise multiplication operator. Please refer to `An Empirical Exploration of Recurrent Network Architectures @@ -587,7 +606,7 @@ class GRUCell(RNNCellBase): name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - Parameters: + Attributes: weight_ih (Parameter): shape (3 * hidden_size, input_size), input to hidden weight, which corresponds to the concatenation of :math:`W_{ir}, W_{iz}, W_{ic}` in the formula. @@ -625,7 +644,6 @@ class GRUCell(RNNCellBase): .. code-block:: python import paddle - paddle.disable_static() x = paddle.randn((4, 16)) prev_h = paddle.randn((4, 32)) @@ -633,6 +651,12 @@ class GRUCell(RNNCellBase): cell = paddle.nn.GRUCell(16, 32) y, h = cell(x, prev_h) + print(y.shape) + print(h.shape) + + #[4,32] + #[4,32] + """ def __init__(self, @@ -707,7 +731,7 @@ class RNN(Layer): It performs :code:`cell.forward()` repeatedly until reaches to the maximum length of `inputs`. - Arguments: + Parameters: cell(RNNCellBase): An instance of `RNNCellBase`. is_reverse (bool, optional): Indicate whether to calculate in the reverse order of input sequences. Defaults to False. @@ -717,8 +741,8 @@ class RNN(Layer): Inputs: inputs (Tensor): A (possibly nested structure of) tensor[s]. The input sequences. - If time major is True, the shape is `[batch_size, time_steps, input_size]` - If time major is False, the shape is [time_steps, batch_size, input_size]` + If time major is False, the shape is `[batch_size, time_steps, input_size]` + If time major is True, the shape is `[time_steps, batch_size, input_size]` where `input_size` is the input size of the cell. initial_states (Tensor|list|tuple, optional): Tensor of a possibly nested structure of tensors, representing the initial state for @@ -753,7 +777,6 @@ class RNN(Layer): .. code-block:: python import paddle - paddle.disable_static() inputs = paddle.rand((4, 23, 16)) prev_h = paddle.randn((4, 32)) @@ -762,6 +785,12 @@ class RNN(Layer): rnn = paddle.nn.RNN(cell) outputs, final_states = rnn(inputs, prev_h) + print(outputs.shape) + print(final_states.shape) + + #[4,23,32] + #[4,32] + """ def __init__(self, cell, is_reverse=False, time_major=False): @@ -795,7 +824,7 @@ class BiRNN(Layer): backward RNN with coresponding cells separately and concats the outputs along the last axis. - Arguments: + Parameters: cell_fw (RNNCellBase): A RNNCellBase instance used for forward RNN. cell_bw (RNNCellBase): A RNNCellBase instance used for backward RNN. time_major (bool): Whether the first dimension of the input means the @@ -841,7 +870,6 @@ class BiRNN(Layer): .. code-block:: python import paddle - paddle.disable_static() cell_fw = paddle.nn.LSTMCell(16, 32) cell_bw = paddle.nn.LSTMCell(16, 32) @@ -850,6 +878,12 @@ class BiRNN(Layer): inputs = paddle.rand((2, 23, 16)) outputs, final_states = rnn(inputs) + print(outputs.shape) + print(final_states[0][0].shape,len(final_states),len(final_states[0])) + + #[4,23,64] + #[2,32] 2 2 + """ def __init__(self, cell_fw, cell_bw, time_major=False): @@ -936,13 +970,11 @@ class SimpleRNN(RNNMixin): .. math:: - h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh}) + h_{t} & = \mathrm{tanh}(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh}) + y_{t} & = h_{t} - - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise - multiplication operator. - Arguments: + Parameters: input_size (int): The input size for the first layer's cell. hidden_size (int): The hidden size for each layer's cell. num_layers (int, optional): Number of layers. Defaults to 1. @@ -997,7 +1029,6 @@ class SimpleRNN(RNNMixin): .. code-block:: python import paddle - paddle.disable_static() rnn = paddle.nn.SimpleRNN(16, 32, 2) @@ -1005,6 +1036,12 @@ class SimpleRNN(RNNMixin): prev_h = paddle.randn((2, 4, 32)) y, h = rnn(x, prev_h) + print(y.shape) + print(h.shape) + + #[4,23,32] + #[2,4,32] + """ def __init__(self, @@ -1077,17 +1114,23 @@ class LSTM(RNNMixin): .. math:: i_{t} & = \sigma(W_{ii}x_{t} + b_{ii} + W_{hi}h_{t-1} + b_{hi}) + f_{t} & = \sigma(W_{if}x_{t} + b_{if} + W_{hf}h_{t-1} + b_{hf}) + o_{t} & = \sigma(W_{io}x_{t} + b_{io} + W_{ho}h_{t-1} + b_{ho}) - \\widetilde{c}_{t} & = \\tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg}) - c_{t} & = f_{t} \* c{t-1} + i{t} \* \\widetile{c}_{t} - h_{t} & = o_{t} \* \\tanh(c_{t}) + + \widetilde{c}_{t} & = \tanh (W_{ig}x_{t} + b_{ig} + W_{hg}h_{t-1} + b_{hg}) + + c_{t} & = f_{t} * c_{t-1} + i_{t} * \widetilde{c}_{t} + + h_{t} & = o_{t} * \tanh(c_{t}) + y_{t} & = h_{t} - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise + where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise multiplication operator. - Arguments: + Parameters: input_size (int): The input size for the first layer's cell. hidden_size (int): The hidden size for each layer's cell. num_layers (int, optional): Number of layers. Defaults to 1. @@ -1130,7 +1173,7 @@ class LSTM(RNNMixin): `[batch_size, time_steps, num_directions * hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1. - final_states (Tensor): the final state, a tuple of two tensors, h and c. + final_states (tuple): the final state, a tuple of two tensors, h and c. The shape of each is `[num_lauers * num_directions, batch_size, hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" @@ -1141,7 +1184,6 @@ class LSTM(RNNMixin): .. code-block:: python import paddle - paddle.disable_static() rnn = paddle.nn.LSTM(16, 32, 2) @@ -1150,6 +1192,14 @@ class LSTM(RNNMixin): prev_c = paddle.randn((2, 4, 32)) y, (h, c) = rnn(x, (prev_h, prev_c)) + print(y.shape) + print(h.shape) + print(c.shape) + + #[4,23,32] + #[2,4,32] + #[2,4,32] + """ def __init__(self, @@ -1215,15 +1265,19 @@ class GRU(RNNMixin): .. math:: r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t} + b_{hr}) - z_{t} & = \sigma(W_{iz)x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz}) - \\widetilde{h}_{t} & = \\tanh(W_{ic)x_{t} + b_{ic} + r_{t} \* (W_{hc}x_{t} + b{hc})) - h_{t} & = z_{t} \* h_{t-1} + (1 - z_{t}) \* \\widetilde{h}_{t} + + z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t} + b_{hz}) + + \widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t} + b_{hc})) + + h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t} + y_{t} & = h_{t} - where :math:`\sigma` is the sigmoid fucntion, and \* is the elemetwise + where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise multiplication operator. - Arguments: + Parameters: input_size (int): The input size for the first layer's cell. hidden_size (int): The hidden size for each layer's cell. num_layers (int, optional): Number of layers. Defaults to 1. @@ -1277,7 +1331,6 @@ class GRU(RNNMixin): .. code-block:: python import paddle - paddle.disable_static() rnn = paddle.nn.GRU(16, 32, 2) @@ -1285,6 +1338,12 @@ class GRU(RNNMixin): prev_h = paddle.randn((2, 4, 32)) y, h = rnn(x, prev_h) + print(y.shape) + print(h.shape) + + #[4,23,32] + #[2,4,32] + """ def __init__(self, -- GitLab