未验证 提交 818de88c 编写于 作者: F Feiyu Chan 提交者: GitHub

fix multiple documentation errors, test=document_fix (#29210) (#29384)

* fix multiple documentation error, test=document_fix

* fix more rst syntax errors, test=document_fix

* fix format issues in docstring, test=document_fix
上级 b6bc4cb5
......@@ -27,9 +27,6 @@ from ...fluid.layers.layer_function_generator import templatedoc
def diag_embed(input, offset=0, dim1=-2, dim2=-1):
"""
:alias_main: paddle.nn.functional.diag_embed
:alias: paddle.nn.functional.diag_embed,paddle.nn.functional.extension.diag_embed
This OP creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2)
are filled by ``input``. By default, a 2D plane formed by the last two dimensions
of the returned tensor will be selected.
......@@ -41,60 +38,59 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1):
- If offset < 0, it is below the main diagonal.
Args:
input(Variable|numpy.ndarray): The input tensor. Must be at least 1-dimensional. The input data type should be float32, float64, int32, int64.
input(Tensor|numpy.ndarray): The input tensor. Must be at least 1-dimensional. The input data type should be float32, float64, int32, int64.
offset(int, optional): Which diagonal to consider. Default: 0 (main diagonal).
dim1(int, optional): The first dimension with respect to which to take diagonal. Default: -2.
dim2(int, optional): The second dimension with respect to which to take diagonal. Default: -1.
Returns:
Variable, the output data type is the same as input data type.
Tensor, the output data type is the same as input data type.
Examples:
.. code-block:: python
import paddle.nn.functional as F
import paddle.fluid.dygraph as dg
import numpy as np
diag_embed = np.random.randn(2, 3).astype('float32')
# [[ 0.7545889 , -0.25074545, 0.5929117 ],
# [-0.6097662 , -0.01753256, 0.619769 ]]
with dg.guard():
data1 = F.diag_embed(diag_embed)
data1.numpy()
# [[[ 0.7545889 , 0. , 0. ],
# [ 0. , -0.25074545, 0. ],
# [ 0. , 0. , 0.5929117 ]],
# [[-0.6097662 , 0. , 0. ],
# [ 0. , -0.01753256, 0. ],
# [ 0. , 0. , 0.619769 ]]]
data2 = F.diag_embed(diag_embed, offset=-1, dim1=0, dim2=2)
data2.numpy()
# [[[ 0. , 0. , 0. , 0. ],
# [ 0.7545889 , 0. , 0. , 0. ],
# [ 0. , -0.25074545, 0. , 0. ],
# [ 0. , 0. , 0.5929117 , 0. ]],
#
# [[ 0. , 0. , 0. , 0. ],
# [-0.6097662 , 0. , 0. , 0. ],
# [ 0. , -0.01753256, 0. , 0. ],
# [ 0. , 0. , 0.619769 , 0. ]]]
data3 = F.diag_embed(diag_embed, offset=1, dim1=0, dim2=2)
data3.numpy()
# [[[ 0. , 0.7545889 , 0. , 0. ],
# [ 0. , -0.6097662 , 0. , 0. ]],
#
# [[ 0. , 0. , -0.25074545, 0. ],
# [ 0. , 0. , -0.01753256, 0. ]],
#
# [[ 0. , 0. , 0. , 0.5929117 ],
# [ 0. , 0. , 0. , 0.619769 ]],
#
# [[ 0. , 0. , 0. , 0. ],
# [ 0. , 0. , 0. , 0. ]]]
data1 = F.diag_embed(diag_embed)
data1.numpy()
# [[[ 0.7545889 , 0. , 0. ],
# [ 0. , -0.25074545, 0. ],
# [ 0. , 0. , 0.5929117 ]],
# [[-0.6097662 , 0. , 0. ],
# [ 0. , -0.01753256, 0. ],
# [ 0. , 0. , 0.619769 ]]]
data2 = F.diag_embed(diag_embed, offset=-1, dim1=0, dim2=2)
data2.numpy()
# [[[ 0. , 0. , 0. , 0. ],
# [ 0.7545889 , 0. , 0. , 0. ],
# [ 0. , -0.25074545, 0. , 0. ],
# [ 0. , 0. , 0.5929117 , 0. ]],
#
# [[ 0. , 0. , 0. , 0. ],
# [-0.6097662 , 0. , 0. , 0. ],
# [ 0. , -0.01753256, 0. , 0. ],
# [ 0. , 0. , 0.619769 , 0. ]]]
data3 = F.diag_embed(diag_embed, offset=1, dim1=0, dim2=2)
data3.numpy()
# [[[ 0. , 0.7545889 , 0. , 0. ],
# [ 0. , -0.6097662 , 0. , 0. ]],
#
# [[ 0. , 0. , -0.25074545, 0. ],
# [ 0. , 0. , -0.01753256, 0. ]],
#
# [[ 0. , 0. , 0. , 0.5929117 ],
# [ 0. , 0. , 0. , 0.619769 ]],
#
# [[ 0. , 0. , 0. , 0. ],
# [ 0. , 0. , 0. , 0. ]]]
"""
inputs = {'Input': [input]}
attrs = {'offset': offset, 'dim1': dim1, 'dim2': dim2}
......@@ -151,15 +147,15 @@ def row_conv(input, weight, act=None):
${comment}
Args:
input (Variable): the input(X) is a LodTensor or tensor, LodTensor(X)
supports variable time-length input sequences. The underlying
input (Tensor): the input(X) is a LodTensor or tensor, LodTensor(X)
supports variable time-length input sequences. The underlying
tensor in this LoDTensor is a matrix with shape (T, D), where
T is the total time steps in this mini-batch and D is the input
data dimension.
If the input is a padded minibatch, the shape of the input is
(N, T, D), N is batch size, T is the max time steps in the batch,
D is the input data dimension.
weight (Variable): The weight. A Tensor with shape
weight (Tensor): The weight. A Tensor with shape
(future_context_size + 1, D), where future_context_size is the
context size of the RowConv operator.
act (str): Non-linear activation to be applied to output variable.
......@@ -171,7 +167,6 @@ def row_conv(input, weight, act=None):
.. code-block:: python
from paddle import fluid, nn
import paddle.fluid.dygraph as dg
import paddle.nn.functional as F
import numpy as np
......@@ -182,16 +177,12 @@ def row_conv(input, weight, act=None):
x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)
weight = np.random.randn(context_size + 1, feature_size).astype(np.float32)
place = fluid.CPUPlace()
with dg.guard(place):
x_var = dg.to_variable(x)
w_var = dg.to_variable(weight)
y_var = F.extension.row_conv(x_var, w_var)
y_np = y_var.numpy()
print(y_np.shape)
x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(weight)
y_var = F.extension.row_conv(x_var, w_var)
print(y_var.shape)
# (4, 8, 6)
# [4, 8, 6]
"""
if in_dygraph_mode():
......
......@@ -20,9 +20,6 @@ from .. import functional as F
class RowConv(layers.Layer):
"""
:alias_main: paddle.nn.RowConv
:alias: paddle.nn.RowConv,paddle.nn.layer.RowConv,paddle.nn.layer.extension.RowConv
**Row-convolution operator**
The row convolution is called lookahead convolution. This operator was
......@@ -50,7 +47,7 @@ class RowConv(layers.Layer):
of convolution kernel is [future_context_size + 1, D].
param_attr (ParamAttr): Attributes of parameters, including
name, initializer etc. Default: None.
act (str): Non-linear activation to be applied to output variable. Default: None.
act (str): Non-linear activation to be applied to output tensor. Default: None.
dtype (str, optional): Data type, it can be "float32". Default: "float32".
Attributes:
......@@ -63,8 +60,7 @@ class RowConv(layers.Layer):
Examples:
.. code-block:: python
from paddle import fluid, nn
import paddle.fluid.dygraph as dg
from paddle import nn
import paddle.nn.functional as F
import numpy as np
......@@ -75,15 +71,12 @@ class RowConv(layers.Layer):
x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)
place = fluid.CPUPlace()
with dg.guard(place):
x_var = dg.to_variable(x)
conv = nn.RowConv(feature_size, context_size)
y_var = conv(x_var)
y_np = y_var.numpy()
print(y_np.shape)
x = paddle.to_tensor(x)
conv = nn.RowConv(feature_size, context_size)
y = conv(x)
print(y.shape)
# (4, 8, 6)
# [4, 8, 6]
"""
def __init__(self,
......
......@@ -273,12 +273,11 @@ class SimpleRNNCell(RNNCellBase):
The formula used is as follows:
.. math::
h_{t} & = act(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh})
h_{t} & = act(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh})
y_{t} & = h_{t}
where :math:`act` is for :attr:`activation` , and * is the elemetwise
multiplication operator.
where :math:`act` is for :attr:`activation`.
Please refer to `Finding Structure in Time
<https://crl.ucsd.edu/~elman/Papers/fsit.pdf>`_ for more details.
......@@ -289,46 +288,32 @@ class SimpleRNNCell(RNNCellBase):
activation (str, optional): The activation in the SimpleRNN cell.
It can be `tanh` or `relu`. Defaults to `tanh`.
weight_ih_attr (ParamAttr, optional): The parameter attribute for
`weight_ih`. Default: None.
:math:`weight_ih`. Default: None.
weight_hh_attr(ParamAttr, optional): The parameter attribute for
`weight_hh`. Default: None.
:math:`weight_hh`. Default: None.
bias_ih_attr (ParamAttr, optional): The parameter attribute for the
`bias_ih`. Default: None.
:math:`bias_ih`. Default: None.
bias_hh_attr (ParamAttr, optional): The parameter attribute for the
`bias_hh`. Default: None.
:math:`bias_hh`. Default: None.
name (str, optional): Name for the operation (optional, default is
None). For more information, please refer to :ref:`api_guide_Name`.
Attributes:
weight_ih (Parameter): shape (hidden_size, input_size), input to hidden
weight, corresponding to :math:`W_{ih}` in the formula.
weight_hh (Parameter): shape (hidden_size, hidden_size), hidden to
hidden weight, corresponding to :math:`W_{hh}` in the formula.
bias_ih (Parameter): shape (hidden_size, ), input to hidden bias,
corresponding to :math:`b_{ih}` in the formula.
bias_hh (Parameter): shape (hidden_size, ), hidden to hidden bias,
corresponding to :math:`b_{hh}` in the formula.
Variables:
- **weight_ih** (Parameter): shape (hidden_size, input_size), input to hidden weight, corresponding to :math:`W_{ih}` in the formula.
- **weight_hh** (Parameter): shape (hidden_size, hidden_size), hidden to hidden weight, corresponding to :math:`W_{hh}` in the formula.
- **bias_ih** (Parameter): shape (hidden_size, ), input to hidden bias, corresponding to :math:`b_{ih}` in the formula.
- **bias_hh** (Parameter): shape (hidden_size, ), hidden to hidden bias, corresponding to :math:`b_{hh}` in the formula.
Inputs:
inputs (Tensor): shape `[batch_size, input_size]`, the input,
corresponding to :math:`x_t` in the formula.
states (Tensor, optional): shape `[batch_size, hidden_size]`, the
previous hidden state, corresponding to :math:`h_{t-1}` in the
formula. When states is None, zero state is used. Defaults to
None.
- **inputs** (Tensor): shape `[batch_size, input_size]`, the input, corresponding to :math:`x_{t}` in the formula.
- **states** (Tensor, optional): shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}` in the formula. When states is None, zero state is used. Defaults to None.
Returns:
(outputs, new_states)
outputs (Tensor): shape `[batch_size, hidden_size]`, the output,
corresponding to :math:`h_{t}` in the formula.
states (Tensor): shape `[batch_size, hidden_size]`, the new hidden
state, corresponding to :math:`h_{t}` in the formula.
- **outputs** (Tensor): shape `[batch_size, hidden_size]`, the output, corresponding to :math:`h_{t}` in the formula.
- **states** (Tensor): shape `[batch_size, hidden_size]`, the new hidden state, corresponding to :math:`h_{t}` in the formula.
Notes:
All the weights and bias are initialized with `Uniform(-std, std)` by
default. Where std = :math:`\frac{1}{\sqrt{hidden_size}}`. For more
information about parameter initialization, please refer to
:ref:`api_fluid_ParamAttr`.
All the weights and bias are initialized with `Uniform(-std, std)` by default. Where std = :math:`\frac{1}{\sqrt{hidden\_size}}`. For more information about parameter initialization, please refer to :ref:`api_fluid_ParamAttr`.
Examples:
......@@ -448,41 +433,24 @@ class LSTMCell(RNNCellBase):
name (str, optional): Name for the operation (optional, default is
None). For more information, please refer to :ref:`api_guide_Name`.
Attributes:
weight_ih (Parameter): shape (4 * hidden_size, input_size), input to
hidden weight, which corresponds to the concatenation of
:math:`W_{ii}, W_{if}, W_{ig}, W_{io}` in the formula.
weight_hh (Parameter): shape (4 * hidden_size, hidden_size), hidden to
hidden weight, which corresponds to the concatenation of
:math:`W_{hi}, W_{hf}, W_{hg}, W_{ho}` in the formula.
bias_ih (Parameter): shape (4 * hidden_size, ), input to hidden bias,
which corresponds to the concatenation of
:math:`b_{ii}, b_{if}, b_{ig}, b_{io}` in the formula.
bias_hh (Parameter): shape (4 * hidden_size, ), hidden to hidden bias,
which corresponds to the concatenation of
:math:`b_{hi}, b_{hf}, b_{hg}, b_{ho}` in the formula.
Variables:
- **weight_ih** (Parameter): shape (4 * hidden_size, input_size), input to hidden weight, which corresponds to the concatenation of :math:`W_{ii}, W_{if}, W_{ig}, W_{io}` in the formula.
- **weight_hh** (Parameter): shape (4 * hidden_size, hidden_size), hidden to hidden weight, which corresponds to the concatenation of :math:`W_{hi}, W_{hf}, W_{hg}, W_{ho}` in the formula.
- **bias_ih** (Parameter): shape (4 * hidden_size, ), input to hidden bias, which corresponds to the concatenation of :math:`b_{ii}, b_{if}, b_{ig}, b_{io}` in the formula.
- **bias_hh** (Parameter): shape (4 * hidden_size, ), hidden to hidden bias, swhich corresponds to the concatenation of :math:`b_{hi}, b_{hf}, b_{hg}, b_{ho}` in the formula.
Inputs:
inputs (Tensor): shape `[batch_size, input_size]`, the input,
corresponding to :math:`x_t` in the formula.
states (tuple, optional): a tuple of two tensors, each of shape
`[batch_size, hidden_size]`, the previous hidden state,
corresponding to :math:`h_{t-1}, c_{t-1}` in the formula.
When states is None, zero state is used. Defaults to None.
- **inputs** (Tensor): shape `[batch_size, input_size]`, the input, corresponding to :math:`x_t` in the formula.
- **states** (tuple, optional): a tuple of two tensors, each of shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}, c_{t-1}` in the formula. When states is None, zero state is used. Defaults to None.
Returns:
(outputs, new_states)
outputs (Tensor): shape `[batch_size, hidden_size]`, the output,
corresponding to :math:`h_{t}` in the formula.
states (tuple): a tuple of two tensors, each of shape
`[batch_size, hidden_size]`, the new hidden states,
corresponding to :math:`h_{t}, c_{t}` in the formula.
- **outputs** (Tensor): shape `[batch_size, hidden_size]`, the output, corresponding to :math:`h_{t}` in the formula.
- **states** (tuple): a tuple of two tensors, each of shape `[batch_size, hidden_size]`, the new hidden states, corresponding to :math:`h_{t}, c_{t}` in the formula.
Notes:
All the weights and bias are initialized with `Uniform(-std, std)` by
default. Where std = :math:`\frac{1}{\sqrt{hidden_size}}`. For more
information about parameter initialization, please refer to
:ref:`api_fluid_ParamAttr`.
default. Where std = :math:`\frac{1}{\sqrt{hidden\_size}}`. For more
information about parameter initialization, please refer to :ref:`api_fluid_ParamAttr`.
Examples:
......@@ -582,11 +550,11 @@ class GRUCell(RNNCellBase):
.. math::
r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t-1} + b_{hr})
r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}h_{t-1} + b_{hr})
z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t-1} + b_{hz})
z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}h_{t-1} + b_{hz})
\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t-1} + b_{hc}))
\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}h_{t-1} + b_{hc}))
h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t}
......@@ -599,7 +567,7 @@ class GRUCell(RNNCellBase):
<http://proceedings.mlr.press/v37/jozefowicz15.pdf>`_ for more details.
Parameters:
input_size (int): The input size..
input_size (int): The input size.
hidden_size (int): The hidden size.
weight_ih_attr(ParamAttr, optional): The parameter attribute for
`weight_ih`. Default: None.
......@@ -612,38 +580,24 @@ class GRUCell(RNNCellBase):
name (str, optional): Name for the operation (optional, default is
None). For more information, please refer to :ref:`api_guide_Name`.
Attributes:
weight_ih (Parameter): shape (3 * hidden_size, input_size), input to
hidden weight, which corresponds to the concatenation of
:math:`W_{ir}, W_{iz}, W_{ic}` in the formula.
weight_hh (Parameter): shape (3 * hidden_size, hidden_size), hidden to
hidden weight, which corresponds to the concatenation of
:math:`W_{hr}, W_{hz}, W_{hc}` in the formula.
bias_ih (Parameter): shape (3 * hidden_size, ), input to hidden bias,
which corresponds to the concatenation of
:math:`b_{ir}, b_{iz}, b_{ic}` in the formula.
bias_hh (Parameter): shape (3 * hidden_size, ), hidden to hidden bias,
which corresponds to the concatenation of
:math:`b_{hr}, b_{hz}, b_{hc}` in the formula.
Variables:
- **weight_ih** (Parameter): shape (3 * hidden_size, input_size), input to hidden weight, which corresponds to the concatenation of :math:`W_{ir}, W_{iz}, W_{ic}` in the formula.
- **weight_hh** (Parameter): shape (3 * hidden_size, hidden_size), hidden to hidden weight, which corresponds to the concatenation of :math:`W_{hr}, W_{hz}, W_{hc}` in the formula.
- **bias_ih** (Parameter): shape (3 * hidden_size, ), input to hidden bias, which corresponds to the concatenation of :math:`b_{ir}, b_{iz}, b_{ic}` in the formula.
- **bias_hh** (Parameter): shape (3 * hidden_size, ), hidden to hidden bias, swhich corresponds to the concatenation of :math:`b_{hr}, b_{hz}, b_{hc}` in the formula.
Inputs:
inputs (Tensor): A tensor with shape `[batch_size, input_size]`,
corresponding to :math:`x_t` in the formula.
states (Tensor): A tensor with shape `[batch_size, hidden_size]`.
corresponding to :math:`h_{t-1}` in the formula.
- **inputs** (Tensor): A tensor with shape `[batch_size, input_size]`, corresponding to :math:`x_t` in the formula.
- **states** (Tensor): A tensor with shape `[batch_size, hidden_size]`, corresponding to :math:`h_{t-1}` in the formula.
Returns:
(outputs, new_states)
outputs (Tensor): shape `[batch_size, hidden_size]`, the output,
corresponding to :math:`h_{t}` in the formula.
states (Tensor): shape `[batch_size, hidden_size]`, the new hidden
state, corresponding to :math:`h_{t}` in the formula.
- **outputs** (Tensor): shape `[batch_size, hidden_size]`, the output, corresponding to :math:`h_{t}` in the formula.
- **states** (Tensor): shape `[batch_size, hidden_size]`, the new hidden state, corresponding to :math:`h_{t}` in the formula.
Notes:
All the weights and bias are initialized with `Uniform(-std, std)` by
default. Where std = :math:`\frac{1}{\sqrt{hidden_size}}`. For more
information about parameter initialization, please refer to
:ref:`api_fluid_ParamAttr`.
default. Where std = :math:`\frac{1}{\sqrt{hidden\_size}}`. For more
information about parameter initialization, please refer to s:ref:`api_fluid_ParamAttr`.
Examples:
......@@ -745,32 +699,14 @@ class RNN(Layer):
time steps. Defaults to False.
Inputs:
inputs (Tensor): A (possibly nested structure of) tensor[s]. The input
sequences.
If time major is False, the shape is `[batch_size, time_steps, input_size]`
If time major is True, the shape is `[time_steps, batch_size, input_size]`
where `input_size` is the input size of the cell.
initial_states (Tensor|list|tuple, optional): Tensor of a possibly
nested structure of tensors, representing the initial state for
the rnn cell. If not provided, `cell.get_initial_states` would be
called to produce the initial states. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.
- **inputs** (Tensor): A (possibly nested structure of) tensor[s]. The input sequences. If time major is False, the shape is `[batch_size, time_steps, input_size]`. If time major is True, the shape is `[time_steps, batch_size, input_size]` where `input_size` is the input size of the cell.
- **initial_states** (Tensor|list|tuple, optional): Tensor of a possibly nested structure of tensors, representing the initial state for the rnn cell. If not provided, `cell.get_initial_states` would be called to produce the initial states. Defaults to None.
- **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None.If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whose time step index are not less than the valid length are treated as paddings.
- **kwargs**: Additional keyword arguments to pass to `forward` of the cell.
Returns:
(outputs, final_states)
outputs (Tensor|list|tuple): the output sequences.
If `time_major` is True, the shape is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states of the cell. Tensor or
a possibly nested structure of tensors which has the same structure
with intial state. Each tensor in final states has the same shape
and dtype as the corresponding tensor in initial states.
- **outputs** (Tensor|list|tuple): the output sequences. If `time_major` is True, the shape is `[time_steps, batch_size, hidden_size]`, else `[batch_size, time_steps, hidden_size]`.
- **final_states** (Tensor|list|tuple): final states of the cell. Tensor or a possibly nested structure of tensors which has the same structure with intial state. Each tensor in final states has the same shape and dtype as the corresponding tensor in initial states.
Notes:
This class is a low level API for wrapping rnn cell into a RNN network.
......@@ -838,33 +774,14 @@ class BiRNN(Layer):
time steps. Defaults to False.
Inputs:
inputs (Tensor): the input sequences of both RNN.
If time_major is True, the shape of is
`[time_steps, batch_size, input_size]`, else the shape is
`[batch_size, time_steps, input_size]`, where input_size is the
input size of both cells.
initial_states (list|tuple, optional): A tuple/list of the initial
states of the forward cell and backward cell. Defaults to None.
If not provided, `cell.get_initial_states` would be called to
produce the initial states for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
**kwargs: Additional keyword arguments. Arguments passed to `forward`
for each cell.
- **inputs** (Tensor): the input sequences of both RNN. If time_major is True, the shape of is `[time_steps, batch_size, input_size]`, else the shape is `[batch_size, time_steps, input_size]`, where input_size is the input size of both cells.
- **initial_states** (list|tuple, optional): A tuple/list of the initial states of the forward cell and backward cell. Defaults to None. If not provided, `cell.get_initial_states` would be called to produce the initial states for each cell. Defaults to None.
- **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whose time step index are not less than the valid length are treated as paddings.
- **kwargs**: Additional keyword arguments. Arguments passed to `forward` for each cell.
Outputs:
(outputs, final_states)
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.
- **outputs** (Tensor): the outputs of the bidirectional RNN. It is the concatenation of the outputs from the forward RNN and backward RNN along the last axis. If time major is True, the shape is `[time_steps, batch_size, size]`, else the shape is `[batch_size, time_steps, size]`, where size is `cell_fw.hidden_size + cell_bw.hidden_size`.
- **final_states** (tuple): A tuple of the final states of the forward cell and backward cell.
Notes:
This class is a low level API for wrapping rnn cells into a BiRNN
......@@ -1150,12 +1067,11 @@ class SimpleRNN(RNNBase):
.. math::
h_{t} & = act(W_{ih}x_{t} + b_{ih} + W_{hh}h{t-1} + b_{hh})
h_{t} & = act(W_{ih}x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh})
y_{t} & = h_{t}
where :math:`act` is for :attr:`activation` , and * is the elemetwise
multiplication operator.
where :math:`act` is for :attr:`activation`.
Using key word arguments to construct is recommended.
......@@ -1183,43 +1099,20 @@ class SimpleRNN(RNNBase):
name (str, optional): Name for the operation (optional, default is
None). For more information, please refer to :ref:`api_guide_Name`.
Inputs:
inputs (Tensor): the input sequence.
If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`,
else, the shape is `[batch_size, time_steps, hidden_size]`.
initial_states (Tensor, optional): the initial state. The shape is
`[num_layers * num_directions, batch_size, hidden_size]`.
If initial_state is not given, zero initial states are used.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
Inputs:s
- **inputs** (Tensor): the input sequence. If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`, else, the shape is `[batch_size, time_steps, hidden_size]`.
- **initial_states** (Tensor, optional): the initial state. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used.
- **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whose time step index are not less than the valid length are treated as paddings.
Returns:
(outputs, final_states)
outputs (Tensor): the output sequence.
If `time_major` is True, the shape is
`[time_steps, batch_size, num_directions * hidden_size]`,
else, the shape is
`[batch_size, time_steps, num_directions * hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
final_states (Tensor): final states. The shape is
`[num_layers * num_directions, batch_size, hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
Attributes:
weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise,
the shape is `[hidden_size, num_directions * hidden_size]`.
weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
with shape `[hidden_size, hidden_size]`.
bias_ih_l[k]: the learnable input-hidden bias of the k-th layer,
with shape `[hidden_size]`.
bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer,
with shape `[hidden_size]`.
- **outputs** (Tensor): the output sequence. If `time_major` is True, the shape is `[time_steps, batch_size, num_directions * hidden_size]`, else, the shape is `[batch_size, time_steps, num_directions * hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
- **final_states** (Tensor): final states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
Variables:
- **weight_ih_l[k]**: the learnable input-hidden weights of the k-th layer. If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise, the shape is `[hidden_size, num_directions * hidden_size]`.
- **weight_hh_l[k]**: the learnable hidden-hidden weights of the k-th layer, with shape `[hidden_size, hidden_size]`.
- **bias_ih_l[k]**: the learnable input-hidden bias of the k-th layer, with shape `[hidden_size]`.
- **bias_hh_l[k]**: the learnable hidden-hidden bias of the k-th layer, with shape `[hidden_size]`.
Examples:
......@@ -1321,43 +1214,19 @@ class LSTM(RNNBase):
None). For more information, please refer to :ref:`api_guide_Name`.
Inputs:
inputs (Tensor): the input sequence.
If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`,
else, the shape is `[batch_size, time_steps, hidden_size]`.
initial_states (tuple, optional): the initial state, a tuple of (h, c),
the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`.
If initial_state is not given, zero initial states are used.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whos time step
index are not less than the valid length are treated as paddings.
- **inputs** (Tensor): the input sequence. If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`, else, the shape is `[batch_size, time_steps, hidden_size]`.
- **initial_states** (tuple, optional): the initial state, a tuple of (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used.
- **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whos time step index are not less than the valid length are treated as paddings.
Returns:
(outputs, final_states)
outputs (Tensor): the output sequence.
If `time_major` is True, the shape is
`[time_steps, batch_size, num_directions * hidden_size]`,
If `time_major` is False, the shape is
`[batch_size, time_steps, num_directions * hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
final_states (tuple): the final state, a tuple of two tensors, h and c.
The shape of each is
`[num_layers * num_directions, batch_size, hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
Attributes:
weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise,
the shape is `[hidden_size, num_directions * hidden_size]`.
weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
with shape `[hidden_size, hidden_size]`.
bias_ih_l[k]: the learnable input-hidden bias of the k-th layer,
with shape `[hidden_size]`.
bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer,
with shape `[hidden_size]`.
- **outputs** (Tensor): the output sequence. If `time_major` is True, the shape is `[time_steps, batch_size, num_directions * hidden_size]`, If `time_major` is False, the shape is `[batch_size, time_steps, num_directions * hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
- **final_states** (tuple): the final state, a tuple of two tensors, h and c. The shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
Variables:
- **weight_ih_l[k]**: the learnable input-hidden weights of the k-th layer. If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise, the shape is `[hidden_size, num_directions * hidden_size]`.
- **weight_hh_l[k]**: the learnable hidden-hidden weights of the k-th layer, with shape `[hidden_size, hidden_size]`.
- **bias_ih_l[k]**: the learnable input-hidden bias of the k-th layer, with shape `[hidden_size]`.
- **bias_hh_l[k]**: the learnable hidden-hidden bias of the k-th layer, swith shape `[hidden_size]`.
Examples:
......@@ -1412,11 +1281,11 @@ class GRU(RNNBase):
.. math::
r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}x_{t-1} + b_{hr})
r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}h_{t-1} + b_{hr})
z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}x_{t-1} + b_{hz})
z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}h_{t-1} + b_{hz})
\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}x_{t-1} + b_{hc}))
\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}h_{t-1} + b_{hc}))
h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t}
......@@ -1450,43 +1319,19 @@ class GRU(RNNBase):
None). For more information, please refer to :ref:`api_guide_Name`.
Inputs:
inputs (Tensor): the input sequence.
If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`,
else, the shape is `[batch_size, time_steps, hidden_size]`.
initial_states (Tensor, optional): the initial state. The shape is
`[num_layers * num_directions, batch_size, hidden_size]`.
If initial_state is not given, zero initial states are used.
Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whos time step
index are not less than the valid length are treated as paddings.
- **inputs** (Tensor): the input sequence. If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`, else, the shape is `[batch_size, time_steps, hidden_size]`.
- **initial_states** (Tensor, optional): the initial state. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used. Defaults to None.
- **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whos time step index are not less than the valid length are treated as paddings.
Returns:
(outputs, final_states)
outputs (Tensor): the output sequence.
If `time_major` is True, the shape is
`[time_steps, batch_size, num_directions * hidden_size]`,
else, the shape is
`[batch_size, time_steps, num_directions * hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
final_states (Tensor): final states. The shape is
`[num_layers * num_directions, batch_size, hidden_size]`.
Note that `num_directions` is 2 if direction is "bidirectional"
else 1.
Attributes:
weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise,
the shape is `[hidden_size, num_directions * hidden_size]`.
weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
with shape `[hidden_size, hidden_size]`.
bias_ih_l[k]: the learnable input-hidden bias of the k-th layer,
with shape `[hidden_size]`.
bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer,
with shape `[hidden_size]`.
- **outputs** (Tensor): the output sequence. If `time_major` is True, the shape is `[time_steps, batch_size, num_directions * hidden_size]`, else, the shape is `[batch_size, time_steps, num_directions * hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
- **final_states** (Tensor): final states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. Note that `num_directions` is 2 if direction is "bidirectional" else 1.
Variables:
- **weight_ih_l[k]**: the learnable input-hidden weights of the k-th layer. If `k = 0`, the shape is `[hidden_size, input_size]`. Otherwise, the shape is `[hidden_size, num_directions * hidden_size]`.
- **weight_hh_l[k]**: the learnable hidden-hidden weights of the k-th layer, with shape `[hidden_size, hidden_size]`.
- **bias_ih_l[k]**: the learnable input-hidden bias of the k-th layer, with shape `[hidden_size]`.
- **bias_hh_l[k]**: the learnable hidden-hidden bias of the k-th layer, with shape `[hidden_size]`.
Examples:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册