未验证 提交 4ae7ea0a 编写于 作者: L lujun 提交者: GitHub

cherry pick, fix dygraph api doc, test=release/1.5

BackwardStrategy
dygraph.nn
dygraph.checkpoint
上级 3cd78f6e
...@@ -731,8 +731,8 @@ paddle.fluid.dygraph.Tracer.train_mode (ArgSpec(args=['self'], varargs=None, key ...@@ -731,8 +731,8 @@ paddle.fluid.dygraph.Tracer.train_mode (ArgSpec(args=['self'], varargs=None, key
paddle.fluid.dygraph.start_gperf_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.start_gperf_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.stop_gperf_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.stop_gperf_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.prepare_context (ArgSpec(args=['strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.prepare_context (ArgSpec(args=['strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.save_persistables (ArgSpec(args=['model_dict', 'dirname', 'optimizers'], varargs=None, keywords=None, defaults=('save_dir', None)), ('document', 'bdeefe733228f5f2d4a8f8c61a5956cf')) paddle.fluid.dygraph.save_persistables (ArgSpec(args=['model_dict', 'dirname', 'optimizers'], varargs=None, keywords=None, defaults=('save_dir', None)), ('document', '7f526f879139a14cda8e0b5a9171f264'))
paddle.fluid.dygraph.load_persistables (ArgSpec(args=['dirname'], varargs=None, keywords=None, defaults=('save_dir',)), ('document', 'fb79b050b5eb52fa9c5fdccefe521aa1')) paddle.fluid.dygraph.load_persistables (ArgSpec(args=['dirname'], varargs=None, keywords=None, defaults=('save_dir',)), ('document', '2574d50a7a9f89fb0d74ddf73d8128f0'))
paddle.fluid.dygraph.NoamDecay.__init__ (ArgSpec(args=['self', 'd_model', 'warmup_steps', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(1, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NoamDecay.__init__ (ArgSpec(args=['self', 'd_model', 'warmup_steps', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(1, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.NoamDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.NoamDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866'))
paddle.fluid.dygraph.NoamDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NoamDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
...@@ -162,22 +162,23 @@ void BindImperative(pybind11::module *m_ptr) { ...@@ -162,22 +162,23 @@ void BindImperative(pybind11::module *m_ptr) {
1. :code:`sort_sum_gradient`, which will sum the gradient by the reverse order of trace. 1. :code:`sort_sum_gradient`, which will sum the gradient by the reverse order of trace.
Examples: Examples:
.. code-block:: python
.. code-block:: python
import numpy as np
import paddle.fluid as fluid import numpy as np
from paddle.fluid import FC import paddle.fluid as fluid
from paddle.fluid import FC
x = np.ones([2, 2], np.float32)
with fluid.dygraph.guard(): x = np.ones([2, 2], np.float32)
inputs2 = [] with fluid.dygraph.guard():
for _ in range(10): inputs2 = []
inputs2.append(fluid.dygraph.base.to_variable(x)) for _ in range(10):
ret2 = fluid.layers.sums(inputs2) inputs2.append(fluid.dygraph.base.to_variable(x))
loss2 = fluid.layers.reduce_sum(ret2) ret2 = fluid.layers.sums(inputs2)
backward_strategy = fluid.dygraph.BackwardStrategy() loss2 = fluid.layers.reduce_sum(ret2)
backward_strategy.sort_sum_gradient = True backward_strategy = fluid.dygraph.BackwardStrategy()
loss2.backward(backward_strategy) backward_strategy.sort_sum_gradient = True
loss2.backward(backward_strategy)
)DOC"); )DOC");
backward_strategy.def(py::init()) backward_strategy.def(py::init())
.def_property("sort_sum_gradient", .def_property("sort_sum_gradient",
......
...@@ -43,35 +43,38 @@ def save_persistables(model_dict, dirname='save_dir', optimizers=None): ...@@ -43,35 +43,38 @@ def save_persistables(model_dict, dirname='save_dir', optimizers=None):
optimizers(fluid.Optimizer|list(fluid.Optimizer)|None): The optimizers to be saved optimizers(fluid.Optimizer|list(fluid.Optimizer)|None): The optimizers to be saved
Returns: Returns:
None
Examples: Examples:
.. code-block:: python .. code-block:: python
ptb_model = PtbModel(
ptb_model = PtbModel(
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
num_steps=num_steps, num_steps=num_steps,
init_scale=init_scale) init_scale=init_scale)
sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd = fluid.optimizer.SGD(learning_rate=0.01)
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1)) y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros( init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32') (num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros( init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32') (num_layers, batch_size, hidden_size), dtype='float32')
x = to_variable(x_data) x = to_variable(x_data)
y = to_variable(y_data) y = to_variable(y_data)
init_hidden = to_variable(init_hidden_data) init_hidden = to_variable(init_hidden_data)
init_cell = to_variable(init_cell_data) init_cell = to_variable(init_cell_data)
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
init_cell) init_cell)
dy_loss.backward() dy_loss.backward()
sgd.minimize(dy_loss) sgd.minimize(dy_loss)
ptb_model.clear_gradient() ptb_model.clear_gradient()
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path, sgd) fluid.dygraph.save_persistables(ptb_model.state_dict(), dirname=param_path, sgd)
""" """
if isinstance(model_dict, collections.OrderedDict): if isinstance(model_dict, collections.OrderedDict):
_save_var_to_file(model_dict, optimizers, dirname, None) _save_var_to_file(model_dict, optimizers, dirname, None)
...@@ -95,13 +98,15 @@ def load_persistables(dirname='save_dir'): ...@@ -95,13 +98,15 @@ def load_persistables(dirname='save_dir'):
optimizer dict: The optimizer optimizer dict: The optimizer
Examples: Examples:
.. code-block:: python
my_layer = layer(fluid.Layer) .. code-block:: python
param_path = "./my_paddle_model"
sgd = SGDOptimizer(learning_rate=1e-3) my_layer = layer(fluid.Layer)
param_dict, optimizer_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) param_path = "./my_paddle_model"
param_1 = param_dict['PtbModel_0.w_1'] sgd = SGDOptimizer(learning_rate=1e-3)
sgd.load(optimizer_dict) param_dict, optimizer_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1']
sgd.load(optimizer_dict)
""" """
return _load_var_from_file(dirname) return _load_var_from_file(dirname)
......
...@@ -302,9 +302,8 @@ class Conv3D(layers.Layer): ...@@ -302,9 +302,8 @@ class Conv3D(layers.Layer):
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
Args: Args:
input (Variable): The input image with [N, C, D, H, W] format. name_scope(str) : The name for this class.
num_filters(int): The number of filter. It is as same as the output num_filters(int): The number of filter. It is as same as the output image channel.
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple, filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square. Otherwise, the filter will be a square.
...@@ -336,8 +335,6 @@ class Conv3D(layers.Layer): ...@@ -336,8 +335,6 @@ class Conv3D(layers.Layer):
library is installed. Default: True library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended. act (str): Activation type, if it is set to None, activation is not appended.
Default: None. Default: None.
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
Returns: Returns:
Variable: The tensor variable storing the convolution and \ Variable: The tensor variable storing the convolution and \
...@@ -506,7 +503,7 @@ class Conv3DTranspose(layers.Layer): ...@@ -506,7 +503,7 @@ class Conv3DTranspose(layers.Layer):
W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
Args: Args:
input(Variable): The input image with [N, C, D, H, W] format. name_scope(str) : The name for this class.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
...@@ -687,21 +684,20 @@ class Pool2D(layers.Layer): ...@@ -687,21 +684,20 @@ class Pool2D(layers.Layer):
name_scope(str) : The name of this class. name_scope(str) : The name of this class.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width). it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int. Otherwise, the pool kernel size will be a square of an int. Default: -1
pool_type: (string), pooling type, can be "max" for max-pooling and "avg" for average-pooling pool_type(str) : The pooling type, can be "max" for max-pooling and "avg" for average-pooling. Default: max
pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width). it must contain two integers, (pool_stride_Height, pool_stride_Width). Otherwise,
Otherwise, the pool stride size will be a square of an int. the pool stride size will be a square of an int. Default: 1
pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple, pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple,
it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width). it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width).
Otherwise, the pool padding size will be a square of an int. Otherwise, the pool padding size will be a square of an int. Default: 0
global_pooling (bool): (bool, default false) Whether to use the global pooling. If global_pooling = true, global_pooling (bool): Whether to use the global pooling. If global_pooling = true,
kernel size and paddings will be ignored kernel size and paddings will be ignored. Default: False
use_cudnn (bool): (bool, default True) Onlyceil_mode (bool) - (bool, default false) Whether to use the ceil use_cudnn (bool): Only used in cudnn kernel, need install cudnn. Default: True
function to calculate output height and width. False is the default. ceil_mode (bool): Whether to use the ceil function to calculate output height and width.
If it is set to False, the floor function will be used. False is the default. If it is set to False, the floor function will be used. Default: False
exclusive (bool): Whether to exclude padding points in average pooling exclusive (bool): Whether to exclude padding points in average pooling mode. Default: True
mode, default is true
Returns: Returns:
Variable: The pooling result. Variable: The pooling result.
...@@ -844,7 +840,7 @@ class FC(layers.Layer): ...@@ -844,7 +840,7 @@ class FC(layers.Layer):
Args: Args:
name_scope(str): The name of this class. name_scope(str): The name of this class.
size(int): The number of output units in this layer. size(int): The number of output units in this layer.
num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than num_flatten_dims (int): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multidimensional tensor will first be flattened two dimensions. If this happens, the multidimensional tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
...@@ -852,14 +848,14 @@ class FC(layers.Layer): ...@@ -852,14 +848,14 @@ class FC(layers.Layer):
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable param_attr (ParamAttr|list of ParamAttr|None): The parameter attribute for learnable
parameters/weights of this layer. parameters/weights of this layer.
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units. of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None. If it is set to None, the bias is initialized zero. Default: None.
act (str, default None): Activation to be applied to the output of this layer. act (str|None): Activation to be applied to the output of this layer.
is_test(bool): A flag indicating whether execution is in test phase. is_test(bool): A flag indicating whether execution is in test phase. Default: False
dtype(str): Dtype used for weight dtype(str): Dtype used for weight
Raises: Raises:
...@@ -1019,15 +1015,15 @@ class BatchNorm(layers.Layer): ...@@ -1019,15 +1015,15 @@ class BatchNorm(layers.Layer):
Args: Args:
name_scope(str): The name of this class. name_scope(str): The name of this class.
act(string, Default None): Activation type, linear|relu|prelu|... act(str|None): Activation type, linear|relu|prelu|...
is_test (bool, Default False): A flag indicating whether it is in is_test (bool): A flag indicating whether it is in
test phrase or not. test phrase or not. Default: False
momentum(float, Default 0.9): The value used for the moving_mean and momentum(float): The value used for the moving_mean and
moving_var computation. The updated formula is: moving_var computation. The updated formula is:
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9. Default is 0.9.
epsilon(float, Default 1e-05): A value added to the denominator for epsilon(float): A value added to the denominator for
numerical stability. Default is 1e-5. numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale` param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
...@@ -1037,19 +1033,19 @@ class BatchNorm(layers.Layer): ...@@ -1037,19 +1033,19 @@ class BatchNorm(layers.Layer):
If it is set to None or one attribute of ParamAttr, batch_norm If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None. is not set, the bias is initialized zero. Default: None.
data_layout(string, default NCHW): NCHW|NHWC data_layout(string): NCHW|NHWC. Default: NCHW
in_place(bool, Default False): Make the input and output of batch norm reuse memory. in_place(bool): Make the input and output of batch norm reuse memory. Default: False
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. moving_mean_name(string|None): The name of moving_mean which store the global Mean. Default: None
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
fuse_with_relu (bool): if True, this OP performs relu after batch norm. fuse_with_relu (bool): if True, this OP performs relu after batch norm. Default: False
use_global_stats(bool, Default False): Whether to use global mean and use_global_stats(bool): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent. or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period. and variance are also used during train period. Default: False
trainable_statistics(bool, Default False): Whether to calculate mean and var in eval mode. In eval mode, when trainable_statistics(bool): Whether to calculate mean and var in eval mode. In eval mode, when
setting trainable_statistics True, mean and variance will be calculated by current batch statistics. setting trainable_statistics True, mean and variance will be calculated by current batch statistics.Default: False
Returns: Returns:
Variable: A tensor variable which is the result after applying batch normalization on the input. Variable: A tensor variable which is the result after applying batch normalization on the input.
...@@ -1057,10 +1053,13 @@ class BatchNorm(layers.Layer): ...@@ -1057,10 +1053,13 @@ class BatchNorm(layers.Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
fc = fluid.FC('fc', size=200, param_attr='fc1.w') import paddle.fluid as fluid
hidden1 = fc(x)
batch_norm = fluid.BatchNorm("batch_norm", 10) with fluid.dygraph.guard():
hidden2 = batch_norm(hidden1) fc = fluid.FC('fc', size=200, param_attr='fc1.w')
hidden1 = fc(x)
batch_norm = fluid.BatchNorm("batch_norm", 10)
hidden2 = batch_norm(hidden1)
""" """
def __init__(self, def __init__(self,
...@@ -1197,16 +1196,16 @@ class Embedding(layers.Layer): ...@@ -1197,16 +1196,16 @@ class Embedding(layers.Layer):
All the input variables are passed in as local variables to the LayerHelper constructor All the input variables are passed in as local variables to the LayerHelper constructor
Args: Args:
name_scope: See base class. name_scope(str): The name of this class.
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
of the dictionary of embeddings and the size of each embedding vector respectively. of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. is_sparse(bool): The flag indicating whether to use sparse update. Default: False
is_distributed(bool): Whether to run lookup table from remote parameter server. is_distributed(bool): Whether to run lookup table from remote parameter server. Default: False.
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters
it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. Default: None.
param_attr(ParamAttr): Parameters for this layer param_attr(ParamAttr): Parameters for this layer. Default: None.
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc. Default: 'float32'.
Returns: Returns:
Variable: The tensor variable storing the embeddings of the \ Variable: The tensor variable storing the embeddings of the \
...@@ -1305,28 +1304,28 @@ class LayerNorm(layers.Layer): ...@@ -1305,28 +1304,28 @@ class LayerNorm(layers.Layer):
* :math:`b`: the trainable bias parameter. * :math:`b`: the trainable bias parameter.
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool): Whether to learn the adaptive gain :math:`g` after
normalization. Default True. normalization. Default: True.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool): Whether to learn the adaptive bias :math:`b` after
normalization. Default True. normalization. Default: True.
begin_norm_axis(int): The normalization will be performed along begin_norm_axis(int): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default 1. Default: 1.
epsilon(float): The small value added to the variance to prevent epsilon(float): The small value added to the variance to prevent
division by zero. Default 1e-05. division by zero. Default: 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None, omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default None. :attr:`param_attr` is initialized as 1 if it is added. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None, omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default None. :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
act(str): Activation to be applied to the output of layer normalizaiton. act(str): Activation to be applied to the output of layer normalizaiton.
Default None. Default: None.
Returns: Returns:
Result after normalization Result after normalization
...@@ -1420,7 +1419,7 @@ class GRUUnit(layers.Layer): ...@@ -1420,7 +1419,7 @@ class GRUUnit(layers.Layer):
if origin_mode is True, then the equation of a gru step is from paper if origin_mode is True, then the equation of a gru step is from paper
`Learning Phrase Representations using RNN Encoder-Decoder for Statistical `Learning Phrase Representations using RNN Encoder-Decoder for Statistical
Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_ Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`
.. math:: .. math::
u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u) u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u)
...@@ -1458,10 +1457,8 @@ class GRUUnit(layers.Layer): ...@@ -1458,10 +1457,8 @@ class GRUUnit(layers.Layer):
and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`. and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
Args: Args:
input (Variable): The fc transformed input value of current step. name_scope(str): The name of this class.
name_scope (str): See base class. size (int): The input dimension value.
hidden (Variable): The hidden value of gru unit from previous step.
size (integer): The input dimension value.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weight matrix. Note: hidden-hidden weight matrix. Note:
...@@ -1483,11 +1480,11 @@ class GRUUnit(layers.Layer): ...@@ -1483,11 +1480,11 @@ class GRUUnit(layers.Layer):
attribute of ParamAttr, gru_unit will create ParamAttr as attribute of ParamAttr, gru_unit will create ParamAttr as
bias_attr. If the Initializer of the bias_attr is not set, the bias bias_attr. If the Initializer of the bias_attr is not set, the bias
is initialized zero. Default: None. is initialized zero. Default: None.
activation (string): The activation type for cell (actNode). activation (str): The activation type for cell (actNode).
Default: 'tanh' Default: 'tanh'
gate_activation (string): The activation type for gates (actGate). gate_activation (str): The activation type for gates (actGate).
Default: 'sigmoid' Default: 'sigmoid'
dtype(string): The dtype of the layers dtype(str): The dtype of the layers. Default: 'float32'
Returns: Returns:
tuple: The hidden value, reset-hidden value and gate values. tuple: The hidden value, reset-hidden value and gate values.
...@@ -1578,7 +1575,7 @@ class NCE(layers.Layer): ...@@ -1578,7 +1575,7 @@ class NCE(layers.Layer):
By default this operator uses a uniform distribution for sampling. By default this operator uses a uniform distribution for sampling.
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
num_total_classes (int): Total number of classes in all samples num_total_classes (int): Total number of classes in all samples
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of nce. If it is set to None or one attribute of ParamAttr, nce of nce. If it is set to None or one attribute of ParamAttr, nce
...@@ -1593,12 +1590,12 @@ class NCE(layers.Layer): ...@@ -1593,12 +1590,12 @@ class NCE(layers.Layer):
sampler (str): The sampler used to sample class from negtive classes. sampler (str): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'. It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'. default: 'uniform'.
custom_dist (float[]): A float[] with size=num_total_classes. custom_dist (float[]|None): A float[] with size=num_total_classes.
It is used when sampler is set to 'custom_dist'. It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled. custom_dist[i] is the probsbility of i-th class to be sampled.
default: None. Default: None.
seed (int): The seed used in sampler. default: 0. seed (int): The seed used in sampler. Default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows. is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default: False.
Returns: Returns:
Variable: The output nce loss. Variable: The output nce loss.
...@@ -1807,8 +1804,8 @@ class PRelu(layers.Layer): ...@@ -1807,8 +1804,8 @@ class PRelu(layers.Layer):
y = \max(0, x) + \\alpha * \min(0, x) y = \max(0, x) + \\alpha * \min(0, x)
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
mode (string): The mode for weight sharing. It supports all, channel mode (str): The mode for weight sharing. It supports all, channel
and element. all: all elements share same weight and element. all: all elements share same weight
channel:elements in a channel share same weight channel:elements in a channel share same weight
element:each element has a weight element:each element has a weight
...@@ -1888,13 +1885,13 @@ class BilinearTensorProduct(layers.Layer): ...@@ -1888,13 +1885,13 @@ class BilinearTensorProduct(layers.Layer):
- :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`. - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
size (int): The dimension of this layer. size (int): The dimension of this layer.
act (str, default None): Activation to be applied to the output of this layer. act (str): Activation to be applied to the output of this layer. Default: None.
name (str, default None): The name of this layer. name (str): The name of this layer. Default: None.
param_attr (ParamAttr, default None): The parameter attribute for the learnable w. param_attr (ParamAttr): The parameter attribute for the learnable w.
parameters/weights of this layer. parameters/weights of this layer. Default: None.
bias_attr (ParamAttr, default None): The parameter attribute for the bias bias_attr (ParamAttr): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units. of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None. If it is set to None, the bias is initialized zero. Default: None.
...@@ -2023,18 +2020,18 @@ class Conv2DTranspose(layers.Layer): ...@@ -2023,18 +2020,18 @@ class Conv2DTranspose(layers.Layer):
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple|None): The output image size. If output size is a output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). None if use tuple, it must contain two integers, (image_H, image_W). None if use
filter_size, padding, and stride to calculate output_size. filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They if output_size and filter_size are specified at the same time, They
should follow the formula above. should follow the formula above. Default: None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple, filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W). it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square. None if use output size to
calculate filter_size. calculate filter_size. Default: None.
padding(int|tuple): The padding size. If padding is a tuple, it must padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0. padding_H = padding_W = padding. Default: padding = 0.
...@@ -2063,8 +2060,6 @@ class Conv2DTranspose(layers.Layer): ...@@ -2063,8 +2060,6 @@ class Conv2DTranspose(layers.Layer):
library is installed. Default: True. library is installed. Default: True.
act (str): Activation type, if it is set to None, activation is not appended. act (str): Activation type, if it is set to None, activation is not appended.
Default: None. Default: None.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: True.
Returns: Returns:
Variable: The tensor variable storing the convolution transpose result. Variable: The tensor variable storing the convolution transpose result.
...@@ -2196,11 +2191,11 @@ class SequenceConv(layers.Layer): ...@@ -2196,11 +2191,11 @@ class SequenceConv(layers.Layer):
in the input parameters to the function. in the input parameters to the function.
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
num_filters (int): number of filters. num_filters (int): number of filters.
filter_size (int): the filter size (H and W). filter_size (int): the filter size (H and W). Default: 3.
filter_stride (int): stride of the filter. filter_stride (int): stride of the filter. Default: 1.
padding (bool): if True, add paddings. padding (bool|None): if True, add paddings. Default: None
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv. bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv.
If it is set to False, no bias will be added to the output units. If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, sequence_conv If it is set to None or one attribute of ParamAttr, sequence_conv
...@@ -2212,8 +2207,6 @@ class SequenceConv(layers.Layer): ...@@ -2212,8 +2207,6 @@ class SequenceConv(layers.Layer):
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
act (str): Activation type, if it is set to None, activation is not appended. act (str): Activation type, if it is set to None, activation is not appended.
Default: None. Default: None.
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
Returns: Returns:
Variable: output of sequence_conv Variable: output of sequence_conv
...@@ -2282,15 +2275,16 @@ class RowConv(layers.Layer): ...@@ -2282,15 +2275,16 @@ class RowConv(layers.Layer):
More details about row_conv please refer to the design document https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 . More details about row_conv please refer to the design document https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 .
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
future_context_size (int): Future context size. Please note, the shape future_context_size (int): Future context size. Please note, the shape
of convolution kernel is [future_context_size + 1, D]. of convolution kernel is [future_context_size + 1, D].
param_attr (ParamAttr): Attributes of parameters, including param_attr (ParamAttr): Attributes of parameters, including
name, initializer etc. name, initializer etc. Default: None.
act (str): Non-linear activation to be applied to output variable. act (str): Non-linear activation to be applied to output variable. Default: None.
Returns: Returns:
the output(Out) is a LodTensor, which supports variable time-length input sequences. The underlying tensor in this LodTensor is a matrix with shape T x N, i.e., the same shape as X. the output(Out) is a LodTensor, which supports variable time-length input sequences.
The underlying tensor in this LodTensor is a matrix with shape T x N, i.e., the same shape as X.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -2344,10 +2338,10 @@ class GroupNorm(layers.Layer): ...@@ -2344,10 +2338,10 @@ class GroupNorm(layers.Layer):
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ . Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
groups(int): The number of groups that divided from channels. groups(int): The number of groups that divided from channels.
epsilon(float): The small value added to the variance to prevent epsilon(float): The small value added to the variance to prevent
division by zero. division by zero. Default: 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
scale :math:`g`. If it is set to False, no scale will be added to the output units. scale :math:`g`. If it is set to False, no scale will be added to the output units.
If it is set to None, the bias is initialized one. Default: None. If it is set to None, the bias is initialized one. Default: None.
...@@ -2472,10 +2466,10 @@ class SpectralNorm(layers.Layer): ...@@ -2472,10 +2466,10 @@ class SpectralNorm(layers.Layer):
Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ . Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ .
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
dim(int): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer, default 0 dim(int): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: 0.
power_iters(int): number of power iterations to calculate spectral norm, default 1 power_iters(int): The number of power iterations to calculate spectral norm. Default: 1.
eps(float): epsilon for numerical stability in calculating norms eps(float): The epsilon for numerical stability in calculating norms. Default: 1e-12.
name (str): The name of this layer. It is optional. name (str): The name of this layer. It is optional.
Returns: Returns:
...@@ -2549,14 +2543,14 @@ class TreeConv(layers.Layer): ...@@ -2549,14 +2543,14 @@ class TreeConv(layers.Layer):
Args: Args:
name_scope (str): See base class. name_scope(str): The name of this class.
output_size(int): output feature width output_size(int): output feature width
num_filters(int): number of filters, Default 1 num_filters(int): number of filters, Default: 1.
max_depth(int): max depth of filters, Default 2 max_depth(int): max depth of filters, Default: 2.
act(str): activation function, Default tanh act(str): activation function, Default: tanh.
param_attr(ParamAttr): the parameter attribute for the filters, Default None param_attr(ParamAttr): the parameter attribute for the filters, Default: None.
bias_attr(ParamAttr): the parameter attribute for the bias of this layer, Default None bias_attr(ParamAttr): the parameter attribute for the bias of this layer, Default: None.
name(str): a name of this layer(optional). If set None, the layer will be named automatically, Default None name(str): a name of this layer(optional). If set None, the layer will be named automatically, Default: None.
Returns: Returns:
out(Variable): (Tensor) The feature vector of subtrees. The shape of the output tensor is [max_tree_node_size, output_size, num_filters]. The output tensor could be a new feature vector for next tree convolution layers out(Variable): (Tensor) The feature vector of subtrees. The shape of the output tensor is [max_tree_node_size, output_size, num_filters]. The output tensor could be a new feature vector for next tree convolution layers
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册