未验证 提交 dcaf60f0 编写于 作者: L lujun 提交者: GitHub

Fix dygraph doc at nn.py for v1.5 (#17904)

fix doc: conv3d, layer norm, bilineartensorproduct, conv2dtranspose, conv3dtranspose,rowconv, groupnorm, spectralnorm, treeconv

hidden api: sequenceConv
上级 3559e5a3
......@@ -27,8 +27,7 @@ import numpy as np
__all__ = [
'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit',
'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose',
'Conv3DTranspose', 'SequenceConv', 'RowConv', 'GroupNorm', 'SpectralNorm',
'TreeConv'
'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv'
]
......@@ -355,8 +354,16 @@ class Conv3D(layers.Layer):
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
conv3d = fluid.layers.conv3d(input=data, num_filters=2, filter_size=3, act="relu")
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32')
conv3d = fluid.dygraph.nn.Conv3D(
'Conv3D', num_filters=2, filter_size=3, act="relu")
ret = conv3d(fluid.dygraph.base.to_variable(data))
"""
def __init__(self,
......@@ -555,12 +562,19 @@ class Conv3DTranspose(layers.Layer):
Examples:
.. code-block:: python
conv3d_transpose = nn.Conv3DTranspose(
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32')
conv3dTranspose = fluid.dygraph.nn.Conv3DTranspose(
'Conv3DTranspose',
num_filters=12,
filter_size=12,
use_cudnn=False)
transpose_res = conv3d_transpose(base.to_variable(input_array))
ret = conv3dTranspose(fluid.dygraph.base.to_variable(data))
"""
def __init__(self,
......@@ -1257,7 +1271,13 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer):
"""
${comment}
Assume feature vectors exist on dimensions
`begin_norm_axis ... rank(input)` and calculate the moment statistics along these dimensions for each feature
vector `a` with size `H`, then normalize each feature vector using the corresponding
statistics. After that, apply learnable gain and bias on the normalized
tensor to scale and shift if `scale` and `shift` are set.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
The formula is as follows:
......@@ -1279,7 +1299,7 @@ class LayerNorm(layers.Layer):
* :math:`b`: the trainable bias parameter.
Args:
input(Variable): The input tensor variable.
name_scope (str): See base class.
scale(bool): Whether to learn the adaptive gain :math:`g` after
normalization. Default True.
shift(bool): Whether to learn the adaptive bias :math:`b` after
......@@ -1302,13 +1322,21 @@ class LayerNorm(layers.Layer):
act(str): Activation to be applied to the output of layer normalizaiton.
Default None.
Returns:
${y_comment}
Result after normalization
Examples:
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
>>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
.. code-block:: python
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
x = numpy.random.random((3, 32, 32)).astype('float32')
layerNorm = fluid.dygraph.nn.LayerNorm(
'LayerNorm', begin_norm_axis=1)
ret = layerNorm(fluid.dygraph.base.to_variable(x))
"""
def __init__(self,
......@@ -1837,8 +1865,7 @@ class BilinearTensorProduct(layers.Layer):
- :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
Args:
x (Variable): 2-D input tensor with shape [batch_size, M]
y (Variable): 2-D input tensor with shape [batch_size, N]
name_scope (str): See base class.
size (int): The dimension of this layer.
act (str, default None): Activation to be applied to the output of this layer.
name (str, default None): The name of this layer.
......@@ -1854,7 +1881,16 @@ class BilinearTensorProduct(layers.Layer):
Examples:
.. code-block:: python
tensor = bilinear_tensor_product(x=layer1, y=layer2, size=1000)
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
layer1 = numpy.random.random((5, 5)).astype('float32')
layer2 = numpy.random.random((5, 4)).astype('float32')
bilinearTensorProduct = fluid.dygraph.nn.BilinearTensorProduct(
'BilinearTensorProduct', size=1000)
ret = bilinearTensorProduct(fluid.dygraph.base.to_variable(layer1),
fluid.dygraph.base.to_variable(layer2))
"""
def __init__(self,
......@@ -1964,7 +2000,7 @@ class Conv2DTranspose(layers.Layer):
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Args:
input(Variable): The input image with [N, C, H, W] format.
name_scope (str): See base class.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
......@@ -2017,8 +2053,15 @@ class Conv2DTranspose(layers.Layer):
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
data = numpy.random.random((3, 32, 32)).astype('float32')
conv2DTranspose = fluid.dygraph.nn.Conv2DTranspose(
'Conv2DTranspose', num_filters=2, filter_size=3)
ret = conv2DTranspose(fluid.dygraph.base.to_variable(data))
"""
def __init__(self,
......@@ -2130,7 +2173,7 @@ class SequenceConv(layers.Layer):
in the input parameters to the function.
Args:
input (Variable): ${x_comment}
name_scope (str): See base class.
num_filters (int): number of filters.
filter_size (int): the filter size (H and W).
filter_stride (int): stride of the filter.
......@@ -2197,6 +2240,49 @@ class SequenceConv(layers.Layer):
class RowConv(layers.Layer):
"""
***Row-convolution operator***
The row convolution is called lookahead convolution. This operator was introduced in the following paper for DeepSpeech2:
http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf
The main motivation is that a bidirectional RNN, useful in DeepSpeech like speech models, learns representation for a sequence by performing a
forward and a backward pass through the entire sequence. However, unlike
unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online
and low-latency setting. The lookahead convolution incorporates information
from future subsequences in a computationally efficient manner to improve
unidirectional recurrent neural networks. The row convolution operator is
different from the 1D sequence convolution, and is computed as follows:
Given an input sequence X of length t and input dimension D, and a filter (W) of size context * D.
More details about row_conv please refer to the design document https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 .
Args:
name_scope (str): See base class.
future_context_size (int): Future context size. Please note, the shape
of convolution kernel is [future_context_size + 1, D].
param_attr (ParamAttr): Attributes of parameters, including
name, initializer etc.
act (str): Non-linear activation to be applied to output variable.
Returns:
the output(Out) is a LodTensor, which supports variable time-length input sequences. The underlying tensor in this LodTensor is a matrix with shape T x N, i.e., the same shape as X.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
x = numpy.random.random((16)).astype('float32')
rowConv = fluid.dygraph.nn.RowConv(
'RowConv', future_context_size=2)
ret = rowConv(fluid.dygraph.base.to_variable(x))
"""
def __init__(self,
name_scope,
future_context_size,
......@@ -2252,6 +2338,16 @@ class GroupNorm(layers.Layer):
Returns:
Variable: A tensor variable which is the result after applying group normalization on the input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
x = numpy.random.random((8, 32, 32)).astype('float32')
groupNorm = fluid.dygraph.nn.GroupNorm('GroupNorm', groups=4)
ret = groupNorm(fluid.dygraph.base.to_variable(x))
"""
......@@ -2319,6 +2415,63 @@ class GroupNorm(layers.Layer):
class SpectralNorm(layers.Layer):
"""
**Spectral Normalization Layer**
This layer calculates the spectral normalization value of weight parameters of
fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D
Parameters. Calculations are showed as follows.
Step 1:
Generate vector U in shape of [H], and V in shape of [W].
While H is the :attr:`dim` th dimension of the input weights,
and W is the product result of remaining dimensions.
Step 2:
:attr:`power_iters` shoule be a positive interger, do following
calculations with U and V for :attr:`power_iters` rounds.
.. math::
\mathbf{v} := \\frac{\mathbf{W}^{T} \mathbf{u}}{\|\mathbf{W}^{T} \mathbf{u}\|_2}
\mathbf{u} := \\frac{\mathbf{W}^{T} \mathbf{v}}{\|\mathbf{W}^{T} \mathbf{v}\|_2}
Step 3:
Calculate :math:`\sigma(\mathbf{W})` and normalize weight values.
.. math::
\sigma(\mathbf{W}) = \mathbf{u}^{T} \mathbf{W} \mathbf{v}
\mathbf{W} = \\frac{\mathbf{W}}{\sigma(\mathbf{W})}
Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ .
Args:
name_scope (str): See base class.
dim(int): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer, default 0
power_iters(int): number of power iterations to calculate spectral norm, default 1
eps(float): epsilon for numerical stability in calculating norms
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable of weight parameters after spectral normalization.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
x = numpy.random.random((2, 8, 32, 32)).astype('float32')
spectralNorm = fluid.dygraph.nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
ret = spectralNorm(fluid.dygraph.base.to_variable(x))
"""
def __init__(self, name_scope, dim=0, power_iters=1, eps=1e-12, name=None):
super(SpectralNorm, self).__init__(name_scope)
self._power_iters = power_iters
......@@ -2362,6 +2515,44 @@ class SpectralNorm(layers.Layer):
class TreeConv(layers.Layer):
"""
***Tree-Based Convolution Operator***
Tree-Based Convolution is a kind of convolution based on tree structure.
Tree-Based Convolution is a part of Tree-Based Convolution Neural Network(TBCNN),
which is used to classify tree structures, such as Abstract Syntax Tree.
Tree-Based Convolution proposed a kind of data structure called continuous binary tree,
which regards multiway tree as binary tree.
The paper of Tree-Based Convolution Operator is here: https://arxiv.org/abs/1409.5718v1
Args:
name_scope (str): See base class.
output_size(int): output feature width
num_filters(int): number of filters, Default 1
max_depth(int): max depth of filters, Default 2
act(str): activation function, Default tanh
param_attr(ParamAttr): the parameter attribute for the filters, Default None
bias_attr(ParamAttr): the parameter attribute for the bias of this layer, Default None
name(str): a name of this layer(optional). If set None, the layer will be named automatically, Default None
Returns:
out(Variable): (Tensor) The feature vector of subtrees. The shape of the output tensor is [max_tree_node_size, output_size, num_filters]. The output tensor could be a new feature vector for next tree convolution layers
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy
with fluid.dygraph.guard():
nodes_vector = numpy.random.random((1, 10, 5)).astype('float32')
edge_set = numpy.random.random((1, 9, 2)).astype('int32')
treeConv = fluid.dygraph.nn.TreeConv(
'TreeConv', output_size=6, num_filters=1, max_depth=2)
ret = treeConv(fluid.dygraph.base.to_variable(nodes_vector), fluid.dygraph.base.to_variable(edge_set))
"""
def __init__(self,
name_scope,
output_size,
......@@ -2400,6 +2591,7 @@ class TreeConv(layers.Layer):
is_bias=False)
def forward(self, nodes_vector, edge_set):
if self._name:
out = self.create_variable(
name=self._name, dtype=self._dtype, persistable=False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册