未验证 提交 20f0878f 编写于 作者: Z Zeng Jinle 提交者: GitHub

Fix en docs of apis (#20050)

* fix en docs of apis, test=develop, test=document_fix

* follow chunwei's comments, test=develop
上级 2b7aca25
...@@ -48,8 +48,8 @@ paddle.fluid.ParallelExecutor ('paddle.fluid.parallel_executor.ParallelExecutor' ...@@ -48,8 +48,8 @@ paddle.fluid.ParallelExecutor ('paddle.fluid.parallel_executor.ParallelExecutor'
paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40')) paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40'))
paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '0af092676e5b1320bb4232396154ce4b')) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '0af092676e5b1320bb4232396154ce4b'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '0627369b86ff974f433f7078d1e78349'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '4829bd8c4a4f1b19438500def321cb65'))
paddle.fluid.DataFeedDesc ('paddle.fluid.data_feed_desc.DataFeedDesc', ('document', '43877a0d9357db94d3dbc7359cbe8c73')) paddle.fluid.DataFeedDesc ('paddle.fluid.data_feed_desc.DataFeedDesc', ('document', '43877a0d9357db94d3dbc7359cbe8c73'))
paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '9c6615854b61caa5f0d3e6ccc5e51338')) paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '9c6615854b61caa5f0d3e6ccc5e51338'))
...@@ -133,7 +133,7 @@ paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_at ...@@ -133,7 +133,7 @@ paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_at
paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae')) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90')) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90'))
paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'f43c659ca1749a3f0ff2231e6dfda07d')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08'))
paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4'))
paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9')) paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9'))
paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c')) paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c'))
...@@ -978,7 +978,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', ...@@ -978,7 +978,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss',
paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b'))
paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '3f1c5385519a3674c18c3a1ab34ac04f')) paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', 'e132700f81e9c5d27a7b3cd32b38d714'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
...@@ -1062,7 +1062,7 @@ paddle.fluid.CUDAPlace ('paddle.fluid.core_avx.CUDAPlace', ('document', '6a6cd8e ...@@ -1062,7 +1062,7 @@ paddle.fluid.CUDAPlace ('paddle.fluid.core_avx.CUDAPlace', ('document', '6a6cd8e
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('document', 'afd58ea5d390b5ea06ca70291a266d45')) paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('document', 'afd58ea5d390b5ea06ca70291a266d45'))
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'cd667b4ee96d7d6fca40aa722d67d744')) paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'a4d4d13ce9eeb86bbaa7ab935c207577'))
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379')) paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -1071,7 +1071,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p ...@@ -1071,7 +1071,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a')) paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc')) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f'))
paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965')) paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a0b00ccc8584b4a1cf4ec5aa74780e77'))
paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209')) paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7')) paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7'))
......
...@@ -342,13 +342,13 @@ def set_gradient_clip(clip, param_list=None, program=None): ...@@ -342,13 +342,13 @@ def set_gradient_clip(clip, param_list=None, program=None):
To specify parameters that require gradient clip. To specify parameters that require gradient clip.
Args: Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, clip (BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` , for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` ,
which describes the type and detailed attributes of required gradient clip. which describes the type and detailed attributes of required gradient clip.
param_list(list(Variable), optional): Parameters that require gradient clip. param_list (list(Variable), optional): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name. It can be a list of parameter or a list of parameter's name.
Default None, meaning that all parameters in the program will be included. Default None, meaning that all parameters in the program will be included.
program(Program, optional): The program where parameters are located. program (Program, optional): The program where parameters are located.
Default None, meaning that using :ref:`api_fluid_default_main_program` . Default None, meaning that using :ref:`api_fluid_default_main_program` .
Returns: Returns:
......
...@@ -1680,76 +1680,53 @@ def dropout(x, ...@@ -1680,76 +1680,53 @@ def dropout(x,
def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
""" """
**Cross Entropy Layer** This operator computes the cross entropy between input and label. It
supports both hard-label and and soft-label cross entropy computation.
This layer computes the cross entropy between `input` and `label`. It 1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]`
supports both standard cross-entropy and soft-label cross-entropy loss is the hard label of each sample.
computation.
1) One-hot cross-entropy:
`soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
.. math:: .. math::
Y[i] = -\log(X[i, Label[i]]) output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index
2) Soft-label cross-entropy: 2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]`
`soft_label = True`, `Label[i, j]` indicates the soft label of class j is the soft label of each sample corresponding to the j-th class.
for sample i:
.. math:: .. math::
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j])
Please make sure that in this case the summation of each row of `label`
equals one.
3) One-hot cross-entropy with vecterized `label`:
As a special case of 2), when each row of 'label' has only one
non-zero element which is equal to 1, soft-label cross-entropy degenerates
to a one-hot cross-entropy with one-hot label representation.
Args: Args:
input (Variable|list): a 2-D tensor with shape [N x D], where N is the input (Variable): a multidimensional Tensor with shape
batch size and D is the number of classes. This :math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is
input is a probability computed by the previous the class number. The data type should be float32 or float64.
operator, which is almost always the result of label (Variable): label value corresponding to input. If
a softmax operator. soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]`
label (Variable|list): the ground truth which is a 2-D tensor. When or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64,
`soft_label` is set to `False`, `label` is a and the value must be inside [0, D). If soft_label=True, the shape,
tensor<int64> with shape [N x 1]. When data type of label should be the same with input, and the sum of
`soft_label` is set to `True`, `label` is a soft label value of each sample should be 1.
tensor<float/double> with shape [N x D]. soft_label (bool): indicate whether label is soft. Default False, meaning that
soft_label (bool): a flag indicating whether to the label is hard. If soft_label=True, the label is soft.
interpretate the given labels as soft ignore_index (int): specify an ignorable label value. The ignored label would be
labels. Default: `False`. omitted when computing. If it is a negative integer, no label would
ignore_index (int): Specifies a target value that is ignored and does be ignored. Only valid when soft_label=False. Default -100.
not contribute to the input gradient. Only valid
if soft_label is set to False. Default: kIgnoreIndex
Returns: Returns:
A 2-D tensor with shape [N x 1], the cross entropy loss. A Variable holding Tensor representing the cross entropy, whose data type is the same with input.
If soft_label=False, the shape of output is the same with label.
Raises: If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` .
ValueError:
1. the 1st dimension of ``input`` and ``label`` are not equal.
2. when ``soft_label == True``, and the 2nd dimension of
``input`` and ``label`` are not equal.
3. when ``soft_label == False``, and the 2nd dimension of
``label`` is not 1.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
classdim = 7 class_num = 7
x = fluid.layers.data(name='x', shape=[3, 7], dtype='float32', append_batch_size=False) x = fluid.layers.data(name='x', shape=[3, 10], dtype='float32')
label = fluid.layers.data(name='label', shape=[3, 1], dtype='float32', append_batch_size=False) label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = fluid.layers.fc(input=x, size=classdim, act='softmax') predict = fluid.layers.fc(input=x, size=class_num, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
""" """
if not soft_label: if not soft_label:
return cross_entropy2(input, label, ignore_index) return cross_entropy2(input, label, ignore_index)
......
...@@ -23,51 +23,51 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] ...@@ -23,51 +23,51 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def create_lod_tensor(data, recursive_seq_lens, place): def create_lod_tensor(data, recursive_seq_lens, place):
""" """
Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a LoDTensor from a numpy array, list or existing LoDTensor.
Create a lod tensor by doing the following: The implementation is as follows:
1. Check that the length-based level of detail (LoD) also known as 1. Check whether the length-based LoD, i.e., :code:`recursive_seq_lens`
recursive_sequence_lengths of the input is valid. is valid.
2. Convert recursive_sequence_lengths to a offset-based LoD. 2. Convert :code:`recursive_seq_lens` to a offset-based LoD.
3. Copy the data from a numpy array, a list or a existing lod tensor to 3. Based on :code:`place` , copy the :code:`data` from a numpy array, list
CPU or GPU device (based on input place). or existing LoDTensor to CPU or GPU device.
4. Set the level of detail (LoD) using the offset-based LoD. 4. Set offset-based LoD to the output LoDTensor.
Examples: Suppose we want to create a LoDTensor to hold data for word sequences,
where each word is represented by an integer. If we want to create
a LoDTensor to represent two sentences, one of 2 words, and one of 3 words.
Suppose we want LoDTensor to hold data for sequences of word, where each Then :code:`data` would be a numpy array of integers with shape (5, 1).
word is represented by an integer. If we want to create a LoDTensor to :code:`recursive_seq_lens` would be [[2, 3]], indicating the word number
represent two sentences, one of 2 words, and one of 3 words. in each sentence. This length-based :code:`recursive_seq_lens` [[2, 3]]
would be converted to offset-based LoD [[0, 2, 5]] inside the function
call.
Then :code:`data` can be a numpy array of integers with shape (5, 1). Please reference :ref:`user_guide_lod_tensor` for more details regarding LoD.
:code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each
sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to
offset-based LoD [[0, 2, 5]] inside the function call.
.. code-block:: python Args:
data (numpy.ndarray|list|LoDTensor): a numpy array, a list or ad LoDTensor
holding the data to be copied.
recursive_seq_lens (list[list[int]]): a list of lists indicating the
length-based LoD info.
place (CPUPlace|CUDAPlace): CPU or GPU place indicating where the data
in the created LoDTensor will be stored.
import paddle.fluid as fluid Returns:
import numpy as np A LoDTensor with tensor data and recursive_seq_lens info.
t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace()) Examples:
Please reference :ref:`api_guide_low_level_lod_tensor` for more details .. code-block:: python
regarding LoD.
Args: import paddle.fluid as fluid
data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a import numpy as np
list holding the data to be copied.
recursive_seq_lens(list): a list of lists indicating the length-based level of detail
info specified by the user.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
Returns: t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())
A fluid LoDTensor object with tensor data and recursive_seq_lens info.
""" """
if isinstance(data, core.LoDTensor): if isinstance(data, core.LoDTensor):
return create_lod_tensor(np.array(data), recursive_seq_lens, place) return create_lod_tensor(np.array(data), recursive_seq_lens, place)
...@@ -116,47 +116,47 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, ...@@ -116,47 +116,47 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low,
""" """
Create a LoDTensor containing random integers. Create a LoDTensor containing random integers.
This function is frequently used in the book examples. So we revised it The implementation is as follows:
based on the new create_lod_tensor API and put it here in the lod_tensor
module to simplify the code.
The function does the following:
1. Calculate the overall shape of the LoDTensor based on the length-based
:code:`recursive_seq_lens` input and the shape of the basic element in
:code:`base_shape`.
2. Create a numpy array of this shape. 1. Obtain the shape of output LoDTensor based on :code:`recursive_seq_lens`
and :code:`base_shape` . The first dimension of the shape is the total
length of sequences, while the other dimensions are the same as
:code:`base_shape` .
3. Create the LoDTensor using create_lod_tensor API. 2. Create a numpy array of random integers, and parse the created numpy
array as parameter :code:`data` of :ref:`api_fluid_create_lod_tensor` to
create the output LoDTensor.
Suppose we want LoDTensor to hold data for sequences of word, where each Suppose we want to create a LoDTensor to hold data for 2 sequences, where
word is represented by an integer. If we want to create a LoDTensor to the dimension of the sequences are [2, 30] and [3, 30] respectively.
represent two sentences, one of 2 words, and one of 3 words. Then The :code:`recursive_seq_lens` would be [[2, 3]], and :code:`base_shape`
'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. would be [30] (the other dimensions excluding the sequence length).
Then the overall shape of the LoDTensor would be [5, 1], holding 5 words Therefore, the shape of the output LoDTensor would be [5, 30], where
for two sentences. the first dimension 5 is the total lengths of the sequences, and the
other dimensions are :code:`base_shape`.
Args: Args:
recursive_seq_lens(list): a list of lists indicating the length-based recursive_seq_lens (list[list[int]]): a list of lists indicating the
level of detail info specified by the user. length-based LoD info.
base_shape(list): the shape of the basic element to be held by the base_shape (list[int]): the shape of the output LoDTensor excluding
LoDTensor. the first dimension.
place(Place): CPU or GPU place indicating where the data in the new place (CPUPlace|CUDAPlace): CPU or GPU place indicating where
LoDTensor will be stored. the data in the created LoDTensor will be stored.
low(int): the lower bound of the random integers. low (int): the lower bound of the random integers.
high(int): the upper bound of the random integers. high (int): the upper bound of the random integers.
Returns: Returns:
A fluid LoDTensor object with tensor data and recursive_seq_lens info. A LoDTensor with tensor data and recursive_seq_lens info, whose data
is inside [low, high].
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]], t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]],
base_shape=[30], place=fluid.CPUPlace(), low=0, high=10) base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
print(t.shape()) # [5, 30]
""" """
assert isinstance(base_shape, list), "base_shape should be a list" assert isinstance(base_shape, list), "base_shape should be a list"
# append the total number of basic elements to the front of its shape # append the total number of basic elements to the front of its shape
......
...@@ -1782,26 +1782,29 @@ class DecayedAdagradOptimizer(Optimizer): ...@@ -1782,26 +1782,29 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer):
""" """
**NOTES: This API does not support sparse parameter optimization.** **Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details: Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.
<https://arxiv.org/abs/1212.5701>`_.
The update is done as follows:
.. math:: .. math::
E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2\\ E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }\\ learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \\epsilon ) / ( E(g_t^2) + \\epsilon ) }
E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2 E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2
Args: Args:
learning_rate(float|Variable): global learning rate. learning_rate (float|Variable): global learning rate.
epsilon(float): a small float number for numeric stability. Default 1.0e-6. epsilon (float): a small float number for numeric stability. Default 1.0e-6.
rho(float): a floating point value indicating the decay rate. rho (float): a floating point value indicating the decay rate. Default 0.95.
regularization(WeightDecayRegularizer, optional): A Regularizer, such as fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no regularization. regularization (WeightDecayRegularizer, optional): A Regularizer, such as
name(str, optional): A optional name prefix for debugging. Default None. fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no
regularization.
name (str, optional): A optional name prefix for debugging. Default None.
Examples: Examples:
.. code-block:: python .. code-block:: python
......
...@@ -27,23 +27,26 @@ __all__ = [ ...@@ -27,23 +27,26 @@ __all__ = [
class ParamAttr(object): class ParamAttr(object):
""" """
Parameter attributes object. To fine-tuning network training process, user Create a object to represent the attribute of parameter. The attributes are:
can set parameter's attributes to control training details. Such as learning rate, name, initializer, learning rate, regularizer, trainable, gradient clip,
regularization, trainable, do_model_average and the method to initialize param. and model average.
Parameters:
Args: name (str, optional): The parameter's name. Default None, meaning that the name
name(str): The parameter's name. Default None. would be created automatically.
initializer(Initializer): The method to initial this parameter. Default None. initializer (Initializer, optional): The method to initial this parameter. Default
learning_rate(float): The parameter's learning rate. The learning rate when None, meaning that the weight parameter is initialized by Xavier initializer,
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. and the bias parameter is initialized by 0.
Default 1.0. learning_rate (float): The parameter's learning rate. The learning rate when
regularizer(WeightDecayRegularizer): Regularization factor. Default None. optimize is the global learning rates times the parameter's learning rate times
trainable(bool): Whether this parameter is trainable. Default True. the factor of learning rate scheduler. Default 1.0.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's regularizer (WeightDecayRegularizer, optional): Regularization factor. Default None, meaning
gradient. Default None. there is no regularization.
do_model_average(bool): Whether this parameter should do model average trainable (bool): Whether this parameter is trainable. Default True.
when model average is enabled. Default True. gradient_clip (BaseGradientClipAttr, optional): The method to clip this parameter's
gradient. Default None, meaning that there is no gradient clip.
do_model_average (bool): Whether this parameter should do model average
when model average is enabled. Default False.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -54,6 +57,7 @@ class ParamAttr(object): ...@@ -54,6 +57,7 @@ class ParamAttr(object):
learning_rate=0.5, learning_rate=0.5,
regularizer=fluid.regularizer.L2Decay(1.0), regularizer=fluid.regularizer.L2Decay(1.0),
trainable=True) trainable=True)
print(w_param_attrs.name) # "fc_weight"
x = fluid.layers.data(name='X', shape=[1], dtype='float32') x = fluid.layers.data(name='X', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册