未验证 提交 20f0878f 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix en docs of apis (#20050)

* fix en docs of apis, test=develop, test=document_fix

* follow chunwei's comments, test=develop
上级 2b7aca25
......@@ -48,8 +48,8 @@ paddle.fluid.ParallelExecutor ('paddle.fluid.parallel_executor.ParallelExecutor'
paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40'))
paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '0af092676e5b1320bb4232396154ce4b'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '0627369b86ff974f433f7078d1e78349'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '4829bd8c4a4f1b19438500def321cb65'))
paddle.fluid.DataFeedDesc ('paddle.fluid.data_feed_desc.DataFeedDesc', ('document', '43877a0d9357db94d3dbc7359cbe8c73'))
paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '9c6615854b61caa5f0d3e6ccc5e51338'))
......@@ -133,7 +133,7 @@ paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_at
paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90'))
paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'f43c659ca1749a3f0ff2231e6dfda07d'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08'))
paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4'))
paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9'))
paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c'))
......@@ -978,7 +978,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss',
paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b'))
paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '3f1c5385519a3674c18c3a1ab34ac04f'))
paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', 'e132700f81e9c5d27a7b3cd32b38d714'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
......@@ -1062,7 +1062,7 @@ paddle.fluid.CUDAPlace ('paddle.fluid.core_avx.CUDAPlace', ('document', '6a6cd8e
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('document', 'afd58ea5d390b5ea06ca70291a266d45'))
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'cd667b4ee96d7d6fca40aa722d67d744'))
paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'a4d4d13ce9eeb86bbaa7ab935c207577'))
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......@@ -1071,7 +1071,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f'))
paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965'))
paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a0b00ccc8584b4a1cf4ec5aa74780e77'))
paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7'))
......
......@@ -342,13 +342,13 @@ def set_gradient_clip(clip, param_list=None, program=None):
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
clip (BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` ,
which describes the type and detailed attributes of required gradient clip.
param_list(list(Variable), optional): Parameters that require gradient clip.
param_list (list(Variable), optional): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
Default None, meaning that all parameters in the program will be included.
program(Program, optional): The program where parameters are located.
program (Program, optional): The program where parameters are located.
Default None, meaning that using :ref:`api_fluid_default_main_program` .
Returns:
......
......@@ -1680,76 +1680,53 @@ def dropout(x,
def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
"""
**Cross Entropy Layer**
This operator computes the cross entropy between input and label. It
supports both hard-label and and soft-label cross entropy computation.
This layer computes the cross entropy between `input` and `label`. It
supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy:
`soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]`
is the hard label of each sample.
.. math::
Y[i] = -\log(X[i, Label[i]])
output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index
2) Soft-label cross-entropy:
`soft_label = True`, `Label[i, j]` indicates the soft label of class j
for sample i:
2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]`
is the soft label of each sample corresponding to the j-th class.
.. math::
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
Please make sure that in this case the summation of each row of `label`
equals one.
3) One-hot cross-entropy with vecterized `label`:
As a special case of 2), when each row of 'label' has only one
non-zero element which is equal to 1, soft-label cross-entropy degenerates
to a one-hot cross-entropy with one-hot label representation.
output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j])
Args:
input (Variable|list): a 2-D tensor with shape [N x D], where N is the
batch size and D is the number of classes. This
input is a probability computed by the previous
operator, which is almost always the result of
a softmax operator.
label (Variable|list): the ground truth which is a 2-D tensor. When
`soft_label` is set to `False`, `label` is a
tensor<int64> with shape [N x 1]. When
`soft_label` is set to `True`, `label` is a
tensor<float/double> with shape [N x D].
soft_label (bool): a flag indicating whether to
interpretate the given labels as soft
labels. Default: `False`.
ignore_index (int): Specifies a target value that is ignored and does
not contribute to the input gradient. Only valid
if soft_label is set to False. Default: kIgnoreIndex
input (Variable): a multidimensional Tensor with shape
:math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is
the class number. The data type should be float32 or float64.
label (Variable): label value corresponding to input. If
soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]`
or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64,
and the value must be inside [0, D). If soft_label=True, the shape,
data type of label should be the same with input, and the sum of
soft label value of each sample should be 1.
soft_label (bool): indicate whether label is soft. Default False, meaning that
the label is hard. If soft_label=True, the label is soft.
ignore_index (int): specify an ignorable label value. The ignored label would be
omitted when computing. If it is a negative integer, no label would
be ignored. Only valid when soft_label=False. Default -100.
Returns:
A 2-D tensor with shape [N x 1], the cross entropy loss.
Raises:
ValueError:
1. the 1st dimension of ``input`` and ``label`` are not equal.
2. when ``soft_label == True``, and the 2nd dimension of
``input`` and ``label`` are not equal.
3. when ``soft_label == False``, and the 2nd dimension of
``label`` is not 1.
A Variable holding Tensor representing the cross entropy, whose data type is the same with input.
If soft_label=False, the shape of output is the same with label.
If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
classdim = 7
x = fluid.layers.data(name='x', shape=[3, 7], dtype='float32', append_batch_size=False)
label = fluid.layers.data(name='label', shape=[3, 1], dtype='float32', append_batch_size=False)
predict = fluid.layers.fc(input=x, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
import paddle.fluid as fluid
class_num = 7
x = fluid.layers.data(name='x', shape=[3, 10], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = fluid.layers.fc(input=x, size=class_num, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
"""
if not soft_label:
return cross_entropy2(input, label, ignore_index)
......
......@@ -23,51 +23,51 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def create_lod_tensor(data, recursive_seq_lens, place):
"""
Create a lod tensor from a numpy array, a list, or an existing lod tensor.
Create a LoDTensor from a numpy array, list or existing LoDTensor.
Create a lod tensor by doing the following:
The implementation is as follows:
1. Check that the length-based level of detail (LoD) also known as
recursive_sequence_lengths of the input is valid.
1. Check whether the length-based LoD, i.e., :code:`recursive_seq_lens`
is valid.
2. Convert recursive_sequence_lengths to a offset-based LoD.
2. Convert :code:`recursive_seq_lens` to a offset-based LoD.
3. Copy the data from a numpy array, a list or a existing lod tensor to
CPU or GPU device (based on input place).
3. Based on :code:`place` , copy the :code:`data` from a numpy array, list
or existing LoDTensor to CPU or GPU device.
4. Set the level of detail (LoD) using the offset-based LoD.
4. Set offset-based LoD to the output LoDTensor.
Examples:
Suppose we want to create a LoDTensor to hold data for word sequences,
where each word is represented by an integer. If we want to create
a LoDTensor to represent two sentences, one of 2 words, and one of 3 words.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words.
Then :code:`data` would be a numpy array of integers with shape (5, 1).
:code:`recursive_seq_lens` would be [[2, 3]], indicating the word number
in each sentence. This length-based :code:`recursive_seq_lens` [[2, 3]]
would be converted to offset-based LoD [[0, 2, 5]] inside the function
call.
Then :code:`data` can be a numpy array of integers with shape (5, 1).
:code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each
sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to
offset-based LoD [[0, 2, 5]] inside the function call.
Please reference :ref:`user_guide_lod_tensor` for more details regarding LoD.
.. code-block:: python
Args:
data (numpy.ndarray|list|LoDTensor): a numpy array, a list or ad LoDTensor
holding the data to be copied.
recursive_seq_lens (list[list[int]]): a list of lists indicating the
length-based LoD info.
place (CPUPlace|CUDAPlace): CPU or GPU place indicating where the data
in the created LoDTensor will be stored.
import paddle.fluid as fluid
import numpy as np
Returns:
A LoDTensor with tensor data and recursive_seq_lens info.
t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())
Examples:
Please reference :ref:`api_guide_low_level_lod_tensor` for more details
regarding LoD.
.. code-block:: python
Args:
data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
list holding the data to be copied.
recursive_seq_lens(list): a list of lists indicating the length-based level of detail
info specified by the user.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
import paddle.fluid as fluid
import numpy as np
Returns:
A fluid LoDTensor object with tensor data and recursive_seq_lens info.
t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())
"""
if isinstance(data, core.LoDTensor):
return create_lod_tensor(np.array(data), recursive_seq_lens, place)
......@@ -116,47 +116,47 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
"""
Create a LoDTensor containing random integers.
This function is frequently used in the book examples. So we revised it
based on the new create_lod_tensor API and put it here in the lod_tensor
module to simplify the code.
The function does the following:
1. Calculate the overall shape of the LoDTensor based on the length-based
:code:`recursive_seq_lens` input and the shape of the basic element in
:code:`base_shape`.
The implementation is as follows:
2. Create a numpy array of this shape.
1. Obtain the shape of output LoDTensor based on :code:`recursive_seq_lens`
and :code:`base_shape` . The first dimension of the shape is the total
length of sequences, while the other dimensions are the same as
:code:`base_shape` .
3. Create the LoDTensor using create_lod_tensor API.
2. Create a numpy array of random integers, and parse the created numpy
array as parameter :code:`data` of :ref:`api_fluid_create_lod_tensor` to
create the output LoDTensor.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words. Then
'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]].
Then the overall shape of the LoDTensor would be [5, 1], holding 5 words
for two sentences.
Suppose we want to create a LoDTensor to hold data for 2 sequences, where
the dimension of the sequences are [2, 30] and [3, 30] respectively.
The :code:`recursive_seq_lens` would be [[2, 3]], and :code:`base_shape`
would be [30] (the other dimensions excluding the sequence length).
Therefore, the shape of the output LoDTensor would be [5, 30], where
the first dimension 5 is the total lengths of the sequences, and the
other dimensions are :code:`base_shape`.
Args:
recursive_seq_lens(list): a list of lists indicating the length-based
level of detail info specified by the user.
base_shape(list): the shape of the basic element to be held by the
LoDTensor.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
low(int): the lower bound of the random integers.
high(int): the upper bound of the random integers.
recursive_seq_lens (list[list[int]]): a list of lists indicating the
length-based LoD info.
base_shape (list[int]): the shape of the output LoDTensor excluding
the first dimension.
place (CPUPlace|CUDAPlace): CPU or GPU place indicating where
the data in the created LoDTensor will be stored.
low (int): the lower bound of the random integers.
high (int): the upper bound of the random integers.
Returns:
A fluid LoDTensor object with tensor data and recursive_seq_lens info.
A LoDTensor with tensor data and recursive_seq_lens info, whose data
is inside [low, high].
Examples:
.. code-block:: python
import paddle.fluid as fluid
t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]],
base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]],
base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
print(t.shape()) # [5, 30]
"""
assert isinstance(base_shape, list), "base_shape should be a list"
# append the total number of basic elements to the front of its shape
......
......@@ -1782,26 +1782,29 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer):
"""
**NOTES: This API does not support sparse parameter optimization.**
**Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
<https://arxiv.org/abs/1212.5701>`_.
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.
The update is done as follows:
.. math::
E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2\\
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }\\
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \\epsilon ) / ( E(g_t^2) + \\epsilon ) }
E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2
Args:
learning_rate(float|Variable): global learning rate.
epsilon(float): a small float number for numeric stability. Default 1.0e-6.
rho(float): a floating point value indicating the decay rate.
regularization(WeightDecayRegularizer, optional): A Regularizer, such as fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no regularization.
name(str, optional): A optional name prefix for debugging. Default None.
learning_rate (float|Variable): global learning rate.
epsilon (float): a small float number for numeric stability. Default 1.0e-6.
rho (float): a floating point value indicating the decay rate. Default 0.95.
regularization (WeightDecayRegularizer, optional): A Regularizer, such as
fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no
regularization.
name (str, optional): A optional name prefix for debugging. Default None.
Examples:
.. code-block:: python
......
......@@ -27,23 +27,26 @@ __all__ = [
class ParamAttr(object):
"""
Parameter attributes object. To fine-tuning network training process, user
can set parameter's attributes to control training details. Such as learning rate,
regularization, trainable, do_model_average and the method to initialize param.
Args:
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average
when model average is enabled. Default True.
Create a object to represent the attribute of parameter. The attributes are:
name, initializer, learning rate, regularizer, trainable, gradient clip,
and model average.
Parameters:
name (str, optional): The parameter's name. Default None, meaning that the name
would be created automatically.
initializer (Initializer, optional): The method to initial this parameter. Default
None, meaning that the weight parameter is initialized by Xavier initializer,
and the bias parameter is initialized by 0.
learning_rate (float): The parameter's learning rate. The learning rate when
optimize is the global learning rates times the parameter's learning rate times
the factor of learning rate scheduler. Default 1.0.
regularizer (WeightDecayRegularizer, optional): Regularization factor. Default None, meaning
there is no regularization.
trainable (bool): Whether this parameter is trainable. Default True.
gradient_clip (BaseGradientClipAttr, optional): The method to clip this parameter's
gradient. Default None, meaning that there is no gradient clip.
do_model_average (bool): Whether this parameter should do model average
when model average is enabled. Default False.
Examples:
.. code-block:: python
......@@ -54,6 +57,7 @@ class ParamAttr(object):
learning_rate=0.5,
regularizer=fluid.regularizer.L2Decay(1.0),
trainable=True)
print(w_param_attrs.name) # "fc_weight"
x = fluid.layers.data(name='X', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册