From 20f0878f7085678fefdd86df181222ca52e1a1e8 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Fri, 27 Sep 2019 20:50:59 +0800 Subject: [PATCH] Fix en docs of apis (#20050) * fix en docs of apis, test=develop, test=document_fix * follow chunwei's comments, test=develop --- paddle/fluid/API.spec | 12 +-- python/paddle/fluid/clip.py | 6 +- python/paddle/fluid/layers/nn.py | 85 ++++++++------------- python/paddle/fluid/lod_tensor.py | 118 +++++++++++++++--------------- python/paddle/fluid/optimizer.py | 25 ++++--- python/paddle/fluid/param_attr.py | 38 +++++----- 6 files changed, 134 insertions(+), 150 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 74c3920674b..0a0c3663f3c 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -48,8 +48,8 @@ paddle.fluid.ParallelExecutor ('paddle.fluid.parallel_executor.ParallelExecutor' paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40')) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '0af092676e5b1320bb4232396154ce4b')) -paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) -paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) +paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '0627369b86ff974f433f7078d1e78349')) +paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '4829bd8c4a4f1b19438500def321cb65')) paddle.fluid.DataFeedDesc ('paddle.fluid.data_feed_desc.DataFeedDesc', ('document', '43877a0d9357db94d3dbc7359cbe8c73')) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '9c6615854b61caa5f0d3e6ccc5e51338')) @@ -133,7 +133,7 @@ paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_at paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae')) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) -paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'f43c659ca1749a3f0ff2231e6dfda07d')) +paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9')) paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c')) @@ -978,7 +978,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b')) -paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '3f1c5385519a3674c18c3a1ab34ac04f')) +paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', 'e132700f81e9c5d27a7b3cd32b38d714')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) @@ -1062,7 +1062,7 @@ paddle.fluid.CUDAPlace ('paddle.fluid.core_avx.CUDAPlace', ('document', '6a6cd8e paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('document', 'afd58ea5d390b5ea06ca70291a266d45')) paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None -paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'cd667b4ee96d7d6fca40aa722d67d744')) +paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'a4d4d13ce9eeb86bbaa7ab935c207577')) paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -1071,7 +1071,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a')) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f')) -paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965')) +paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a0b00ccc8584b4a1cf4ec5aa74780e77')) paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7')) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index aeef8505f8e..2188c069b8b 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -342,13 +342,13 @@ def set_gradient_clip(clip, param_list=None, program=None): To specify parameters that require gradient clip. Args: - clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, + clip (BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` , which describes the type and detailed attributes of required gradient clip. - param_list(list(Variable), optional): Parameters that require gradient clip. + param_list (list(Variable), optional): Parameters that require gradient clip. It can be a list of parameter or a list of parameter's name. Default None, meaning that all parameters in the program will be included. - program(Program, optional): The program where parameters are located. + program (Program, optional): The program where parameters are located. Default None, meaning that using :ref:`api_fluid_default_main_program` . Returns: diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 8f6c8a5d127..0632ad9ad2c 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1680,76 +1680,53 @@ def dropout(x, def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): """ - **Cross Entropy Layer** + This operator computes the cross entropy between input and label. It + supports both hard-label and and soft-label cross entropy computation. - This layer computes the cross entropy between `input` and `label`. It - supports both standard cross-entropy and soft-label cross-entropy loss - computation. - - 1) One-hot cross-entropy: - `soft_label = False`, `Label[i, 0]` indicates the class index for sample i: + 1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]` + is the hard label of each sample. .. math:: - Y[i] = -\log(X[i, Label[i]]) + output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index - 2) Soft-label cross-entropy: - `soft_label = True`, `Label[i, j]` indicates the soft label of class j - for sample i: + 2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]` + is the soft label of each sample corresponding to the j-th class. .. math:: - Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} - - Please make sure that in this case the summation of each row of `label` - equals one. - - 3) One-hot cross-entropy with vecterized `label`: - As a special case of 2), when each row of 'label' has only one - non-zero element which is equal to 1, soft-label cross-entropy degenerates - to a one-hot cross-entropy with one-hot label representation. + output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j]) Args: - input (Variable|list): a 2-D tensor with shape [N x D], where N is the - batch size and D is the number of classes. This - input is a probability computed by the previous - operator, which is almost always the result of - a softmax operator. - label (Variable|list): the ground truth which is a 2-D tensor. When - `soft_label` is set to `False`, `label` is a - tensor with shape [N x 1]. When - `soft_label` is set to `True`, `label` is a - tensor with shape [N x D]. - soft_label (bool): a flag indicating whether to - interpretate the given labels as soft - labels. Default: `False`. - ignore_index (int): Specifies a target value that is ignored and does - not contribute to the input gradient. Only valid - if soft_label is set to False. Default: kIgnoreIndex + input (Variable): a multidimensional Tensor with shape + :math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is + the class number. The data type should be float32 or float64. + label (Variable): label value corresponding to input. If + soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]` + or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64, + and the value must be inside [0, D). If soft_label=True, the shape, + data type of label should be the same with input, and the sum of + soft label value of each sample should be 1. + soft_label (bool): indicate whether label is soft. Default False, meaning that + the label is hard. If soft_label=True, the label is soft. + ignore_index (int): specify an ignorable label value. The ignored label would be + omitted when computing. If it is a negative integer, no label would + be ignored. Only valid when soft_label=False. Default -100. Returns: - A 2-D tensor with shape [N x 1], the cross entropy loss. - - Raises: - ValueError: - - 1. the 1st dimension of ``input`` and ``label`` are not equal. - - 2. when ``soft_label == True``, and the 2nd dimension of - ``input`` and ``label`` are not equal. - - 3. when ``soft_label == False``, and the 2nd dimension of - ``label`` is not 1. + A Variable holding Tensor representing the cross entropy, whose data type is the same with input. + If soft_label=False, the shape of output is the same with label. + If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` . Examples: .. code-block:: python - import paddle.fluid as fluid - classdim = 7 - x = fluid.layers.data(name='x', shape=[3, 7], dtype='float32', append_batch_size=False) - label = fluid.layers.data(name='label', shape=[3, 1], dtype='float32', append_batch_size=False) - predict = fluid.layers.fc(input=x, size=classdim, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) + import paddle.fluid as fluid + class_num = 7 + x = fluid.layers.data(name='x', shape=[3, 10], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + predict = fluid.layers.fc(input=x, size=class_num, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) """ if not soft_label: return cross_entropy2(input, label, ignore_index) diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 9d5ed2e6d99..941ace24b36 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -23,51 +23,51 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] def create_lod_tensor(data, recursive_seq_lens, place): """ - Create a lod tensor from a numpy array, a list, or an existing lod tensor. + Create a LoDTensor from a numpy array, list or existing LoDTensor. - Create a lod tensor by doing the following: + The implementation is as follows: - 1. Check that the length-based level of detail (LoD) also known as - recursive_sequence_lengths of the input is valid. + 1. Check whether the length-based LoD, i.e., :code:`recursive_seq_lens` + is valid. - 2. Convert recursive_sequence_lengths to a offset-based LoD. + 2. Convert :code:`recursive_seq_lens` to a offset-based LoD. - 3. Copy the data from a numpy array, a list or a existing lod tensor to - CPU or GPU device (based on input place). + 3. Based on :code:`place` , copy the :code:`data` from a numpy array, list + or existing LoDTensor to CPU or GPU device. - 4. Set the level of detail (LoD) using the offset-based LoD. + 4. Set offset-based LoD to the output LoDTensor. - Examples: + Suppose we want to create a LoDTensor to hold data for word sequences, + where each word is represented by an integer. If we want to create + a LoDTensor to represent two sentences, one of 2 words, and one of 3 words. - Suppose we want LoDTensor to hold data for sequences of word, where each - word is represented by an integer. If we want to create a LoDTensor to - represent two sentences, one of 2 words, and one of 3 words. + Then :code:`data` would be a numpy array of integers with shape (5, 1). + :code:`recursive_seq_lens` would be [[2, 3]], indicating the word number + in each sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] + would be converted to offset-based LoD [[0, 2, 5]] inside the function + call. - Then :code:`data` can be a numpy array of integers with shape (5, 1). - :code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each - sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to - offset-based LoD [[0, 2, 5]] inside the function call. + Please reference :ref:`user_guide_lod_tensor` for more details regarding LoD. - .. code-block:: python + Args: + data (numpy.ndarray|list|LoDTensor): a numpy array, a list or ad LoDTensor + holding the data to be copied. + recursive_seq_lens (list[list[int]]): a list of lists indicating the + length-based LoD info. + place (CPUPlace|CUDAPlace): CPU or GPU place indicating where the data + in the created LoDTensor will be stored. - import paddle.fluid as fluid - import numpy as np + Returns: + A LoDTensor with tensor data and recursive_seq_lens info. - t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace()) + Examples: - Please reference :ref:`api_guide_low_level_lod_tensor` for more details - regarding LoD. + .. code-block:: python - Args: - data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a - list holding the data to be copied. - recursive_seq_lens(list): a list of lists indicating the length-based level of detail - info specified by the user. - place(Place): CPU or GPU place indicating where the data in the new - LoDTensor will be stored. + import paddle.fluid as fluid + import numpy as np - Returns: - A fluid LoDTensor object with tensor data and recursive_seq_lens info. + t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace()) """ if isinstance(data, core.LoDTensor): return create_lod_tensor(np.array(data), recursive_seq_lens, place) @@ -116,47 +116,47 @@ def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, """ Create a LoDTensor containing random integers. - This function is frequently used in the book examples. So we revised it - based on the new create_lod_tensor API and put it here in the lod_tensor - module to simplify the code. - - The function does the following: - - 1. Calculate the overall shape of the LoDTensor based on the length-based - :code:`recursive_seq_lens` input and the shape of the basic element in - :code:`base_shape`. + The implementation is as follows: - 2. Create a numpy array of this shape. + 1. Obtain the shape of output LoDTensor based on :code:`recursive_seq_lens` + and :code:`base_shape` . The first dimension of the shape is the total + length of sequences, while the other dimensions are the same as + :code:`base_shape` . - 3. Create the LoDTensor using create_lod_tensor API. + 2. Create a numpy array of random integers, and parse the created numpy + array as parameter :code:`data` of :ref:`api_fluid_create_lod_tensor` to + create the output LoDTensor. - Suppose we want LoDTensor to hold data for sequences of word, where each - word is represented by an integer. If we want to create a LoDTensor to - represent two sentences, one of 2 words, and one of 3 words. Then - 'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. - Then the overall shape of the LoDTensor would be [5, 1], holding 5 words - for two sentences. + Suppose we want to create a LoDTensor to hold data for 2 sequences, where + the dimension of the sequences are [2, 30] and [3, 30] respectively. + The :code:`recursive_seq_lens` would be [[2, 3]], and :code:`base_shape` + would be [30] (the other dimensions excluding the sequence length). + Therefore, the shape of the output LoDTensor would be [5, 30], where + the first dimension 5 is the total lengths of the sequences, and the + other dimensions are :code:`base_shape`. Args: - recursive_seq_lens(list): a list of lists indicating the length-based - level of detail info specified by the user. - base_shape(list): the shape of the basic element to be held by the - LoDTensor. - place(Place): CPU or GPU place indicating where the data in the new - LoDTensor will be stored. - low(int): the lower bound of the random integers. - high(int): the upper bound of the random integers. + recursive_seq_lens (list[list[int]]): a list of lists indicating the + length-based LoD info. + base_shape (list[int]): the shape of the output LoDTensor excluding + the first dimension. + place (CPUPlace|CUDAPlace): CPU or GPU place indicating where + the data in the created LoDTensor will be stored. + low (int): the lower bound of the random integers. + high (int): the upper bound of the random integers. Returns: - A fluid LoDTensor object with tensor data and recursive_seq_lens info. + A LoDTensor with tensor data and recursive_seq_lens info, whose data + is inside [low, high]. Examples: .. code-block:: python import paddle.fluid as fluid - t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]], - base_shape=[30], place=fluid.CPUPlace(), low=0, high=10) + t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]], + base_shape=[30], place=fluid.CPUPlace(), low=0, high=10) + print(t.shape()) # [5, 30] """ assert isinstance(base_shape, list), "base_shape should be a list" # append the total number of basic elements to the front of its shape diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 3feff3a1d97..3758eb5d43d 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1782,26 +1782,29 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): """ - **NOTES: This API does not support sparse parameter optimization.** + **Notes: This API does not support sparse parameter optimization.** Adadelta Optimizer. Please refer to this for details: - `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD - `_. + `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD `_. + + The update is done as follows: .. math:: - E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2\\ + E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 - learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }\\ + learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \\epsilon ) / ( E(g_t^2) + \\epsilon ) } - E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2 + E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2 Args: - learning_rate(float|Variable): global learning rate. - epsilon(float): a small float number for numeric stability. Default 1.0e-6. - rho(float): a floating point value indicating the decay rate. - regularization(WeightDecayRegularizer, optional): A Regularizer, such as fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no regularization. - name(str, optional): A optional name prefix for debugging. Default None. + learning_rate (float|Variable): global learning rate. + epsilon (float): a small float number for numeric stability. Default 1.0e-6. + rho (float): a floating point value indicating the decay rate. Default 0.95. + regularization (WeightDecayRegularizer, optional): A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no + regularization. + name (str, optional): A optional name prefix for debugging. Default None. Examples: .. code-block:: python diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 028aada68cd..ebf0f2e0cbc 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -27,23 +27,26 @@ __all__ = [ class ParamAttr(object): """ - Parameter attributes object. To fine-tuning network training process, user - can set parameter's attributes to control training details. Such as learning rate, - regularization, trainable, do_model_average and the method to initialize param. - - - Args: - name(str): The parameter's name. Default None. - initializer(Initializer): The method to initial this parameter. Default None. - learning_rate(float): The parameter's learning rate. The learning rate when - optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. - Default 1.0. - regularizer(WeightDecayRegularizer): Regularization factor. Default None. - trainable(bool): Whether this parameter is trainable. Default True. - gradient_clip(BaseGradientClipAttr): The method to clip this parameter's - gradient. Default None. - do_model_average(bool): Whether this parameter should do model average - when model average is enabled. Default True. + Create a object to represent the attribute of parameter. The attributes are: + name, initializer, learning rate, regularizer, trainable, gradient clip, + and model average. + + Parameters: + name (str, optional): The parameter's name. Default None, meaning that the name + would be created automatically. + initializer (Initializer, optional): The method to initial this parameter. Default + None, meaning that the weight parameter is initialized by Xavier initializer, + and the bias parameter is initialized by 0. + learning_rate (float): The parameter's learning rate. The learning rate when + optimize is the global learning rates times the parameter's learning rate times + the factor of learning rate scheduler. Default 1.0. + regularizer (WeightDecayRegularizer, optional): Regularization factor. Default None, meaning + there is no regularization. + trainable (bool): Whether this parameter is trainable. Default True. + gradient_clip (BaseGradientClipAttr, optional): The method to clip this parameter's + gradient. Default None, meaning that there is no gradient clip. + do_model_average (bool): Whether this parameter should do model average + when model average is enabled. Default False. Examples: .. code-block:: python @@ -54,6 +57,7 @@ class ParamAttr(object): learning_rate=0.5, regularizer=fluid.regularizer.L2Decay(1.0), trainable=True) + print(w_param_attrs.name) # "fc_weight" x = fluid.layers.data(name='X', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) """ -- GitLab