diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 80e8ff484a4c04df1b41bbca284d7c604962934c..f4013b61d06e8e55f16700078a4611aade20c751 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -748,16 +748,25 @@ def max_sequence_len(rank_table): def lod_tensor_to_array(x, table): - """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. + """ + Convert a LoDTensor to a LoDTensorArray. + + This function split a LoDTesnor to a LoDTensorArray according to its LoD + information. LoDTensorArray is an alias of C++ std::vector in + Paddle. The generated LoDTensorArray of this function can be further read + or written by 'read_from_array()' and 'write_to_array()' operators. However, + this function is generally an internal component of Paddle 'DynamicRNN'. + Users should not use it directly. Args: - x (Variable|list): The LOD tensor to be converted to a LOD tensor array. + x (Variable|list): The LoDTensor to be converted to a LoDTensorArray. table (ParamAttr|list): The variable that stores the level of lod which is ordered by sequence length in - descending order. + descending order. It is generally generated + by 'layers.lod_rank_table()' API. Returns: - Variable: The variable of type array that has been converted from a + Variable: The LoDTensorArray that has been converted from the input tensor. Examples: @@ -1047,6 +1056,13 @@ def array_length(array): class ConditionalBlockGuard(BlockGuard): + """ + ConditionalBlockGuard is derived from BlockGuard. It is dedicated for + holding a ConditionalBlock, and helping users entering and exiting the + ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard + is generally an internal component of IfElse, users should not use it directly. + """ + def __init__(self, block): if not isinstance(block, ConditionalBlock): raise TypeError("block should be conditional block") @@ -1563,17 +1579,15 @@ def reorder_lod_tensor_by_rank(x, rank_table): def is_empty(x, cond=None, **ignored): """ - **Is Empty** - - This layer returns the truth value of whether the variable is empty. + Test whether an Variable is empty. Args: - x(Variable): Operand of *is_empty* - cond(Variable|None): Optional output variable to store the result - of *is_empty* + x (Variable): The Variable to be tested. + cond (Variable|None): Output parameter. Returns the test result + of given 'x'. Returns: - Variable: The tensor variable storing the output of *is_empty*. + Variable: The tensor variable storing the test result of 'x'. Raises: TypeError: If input cond is not a variable, or cond's dtype is diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 716cc7824eff0c56cc55a055310fa8b1913ac5e6..9cbb559093d384f70ef880de513ddcb21d080413 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -70,21 +70,40 @@ def noam_decay(d_model, warmup_steps): def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies exponential decay to the learning rate. + """ + Applies exponential decay to the learning rate. + + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, the learning rate will be decayed by + 'decay_rate' every 'decay_steps' steps. + + >>> if staircase == True: + >>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps) + >>> else: + >>> decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) - ```python - decayed_learning_rate = learning_rate * - decay_rate ^ (global_step / decay_steps) - ``` Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + """ global_step = _decay_step_counter() @@ -128,22 +147,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies inverse time decay to the initial learning rate. + """ + Applies inverse time decay to the initial learning rate. - >>> if staircase: + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, an inverse decay function will be + applied to the initial learning rate. + + >>> if staircase == True: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) >>> else: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training. - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.inverse_time_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) """ global_step = _decay_step_counter() diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c8cbb5ef00b7dac4ae3f833d3d98653e17bee2ab..047c3aa2b7acd0e19b05dfae13c28d2f16007e10 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -102,14 +102,15 @@ def fc(input, """ **Fully Connected Layer** - The fully connected layer can take multiple tensors as its inputs. It - creates a variable called weights for each input tensor, which represents - a fully connected weight matrix from each input unit to each output unit. - The fully connected layer multiplies each input tensor with its coresponding - weight to produce an output Tensor. If multiple input tensors are given, - the results of multiple multiplications will be sumed up. If bias_attr is - not None, a bias variable will be created and added to the output. Finally, - if activation is not None, it will be applied to the output as well. + This function creates a fully connected layer in the network. It can take + multiple tensors as its inputs. It creates a variable called weights for + each input tensor, which represents a fully connected weight matrix from + each input unit to each output unit. The fully connected layer multiplies + each input tensor with its coresponding weight to produce an output Tensor. + If multiple input tensors are given, the results of multiple multiplications + will be sumed up. If bias_attr is not None, a bias variable will be created + and added to the output. Finally, if activation is not None, it will be applied + to the output as well. This process can be formulated as follows: @@ -878,7 +879,7 @@ def cos_sim(X, Y): Args: X (Variable): The input X. Y (Variable): The input Y. - + Returns: Variable: the output of cosine(X, Y). """ @@ -1083,7 +1084,7 @@ def chunk_eval(input, chunk_scheme (str): ${chunk_scheme_comment} num_chunk_types (int): ${num_chunk_types_comment} excluded_chunk_types (list): ${excluded_chunk_types_comment} - + Returns: tuple: tuple containing: (precision, recall, f1_score, num_infer_chunks, num_label_chunks, @@ -1143,7 +1144,7 @@ def sequence_conv(input, bias_attr (ParamAttr|None): attributes for bias param_attr (ParamAttr|None): attributes for parameter act (str): the activation type - + Returns: Variable: output of sequence_conv """ @@ -1509,6 +1510,7 @@ def sequence_last_step(input): return sequence_pool(input=input, pool_type="last") +@templatedoc() def pool2d(input, pool_size=-1, pool_type="max", @@ -1520,12 +1522,12 @@ def pool2d(input, use_mkldnn=False, name=None): """ - This function adds the operator for pooling in 2 dimensions, using the - pooling configurations mentioned in input parameters. + ${comment} Args: input (Variable): ${input_comment} - pool_size (int): ${ksize_comment} + pool_size (int): The side length of pooling windows. All pooling + windows are squares with pool_size on a side. pool_type (str): ${pooling_type_comment} pool_stride (int): stride of the pooling layer. pool_padding (int): padding size. @@ -1533,11 +1535,29 @@ def pool2d(input, use_cudnn (bool): ${use_cudnn_comment} ceil_mode (bool): ${ceil_mode_comment} use_mkldnn (bool): ${use_mkldnn_comment} - name (str): A name for this layer(optional). If set None, the layer - will be named automatically. - + name (str|None): A name for this layer(optional). If set None, the + layer will be named automatically. + Returns: Variable: output of pool2d layer. + + Raises: + ValueError: If 'pool_type' is not "max" nor "avg" + ValueError: If 'global_pooling' is False and 'pool_size' is -1 + ValueError: If 'use_cudnn' is not a bool value. + + Examples: + + .. code-block:: python + + data = fluid.layers.data( + name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.pool2d( + input=data, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -1800,7 +1820,7 @@ def beam_search_decode(ids, scores, name=None): ids (Variable): ${ids_comment} scores (Variable): ${scores_comment} name (str): The name of this layer. It is optional. - + Returns: tuple: a tuple of two output variable: sentence_ids, sentence_scores """ @@ -2063,7 +2083,7 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): beam_size (int): ${beam_size_comment} end_id (int): ${end_id_comment} level (int): ${level_comment} - + Returns: tuple: a tuple of beam_search output variables: selected_ids, selected_scores ''' @@ -2719,7 +2739,7 @@ def topk(input, k, name=None): This operator is used to find values and indices of the k largest entries for the last dimension. - If the input is a vector (rank=1), finds the k largest entries in the vector + If the input is a vector (1-D Tensor), finds the k largest entries in the vector and outputs their values and indices as vectors. Thus values[j] is the j-th largest entry in input, and its index is indices[j]. @@ -2729,9 +2749,11 @@ def topk(input, k, name=None): Args: input(Variable): The input variable which can be a vector or Tensor with higher rank. - k(int): An integer value to specify the top k largest elements. + k(int): The number of top elements to look for along the last dimension + of input. name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + will be named automatically. + Default: None Returns: values(Variable): The k largest elements along each last dimensional @@ -2739,13 +2761,16 @@ def topk(input, k, name=None): indices(Variable): The indices of values within the last dimension of input. + Raises: + ValueError: If k < 1 or k is not less than the last dimension of input + Examples: .. code-block:: python top5_values, top5_indices = layers.topk(input, k=5) """ shape = input.shape - if k < 1 and k >= shape[-1]: + if k < 1 or k >= shape[-1]: raise ValueError("k must be greater than 0 and less than %d." % (shape[-1])) @@ -3045,7 +3070,7 @@ def nce(input, param_attr (ParamAttr|None): attributes for parameter bias_attr (ParamAttr|None): attributes for bias num_neg_samples (int): ${num_neg_samples_comment} - + Returns: Variable: output of nce layer. """ diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 62b01d595a812ee8fc094e40b6dfb5c3f56cd012..e03c8ca91462961976ede253908e319957ba874e 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -79,20 +79,33 @@ def create_global_var(shape, force_cpu=False, name=None): """ - Create a global variable. such as global_step + Create a new variable in the global block(block 0). + Args: shape(list[int]): shape of the variable - value(float): the value of the variable - dtype(string): element type of the parameter - persistable(bool): if this variable is persistable - force_cpu(bool): force this variable to be on CPU + value(float): the value of the variable. The new created + variable will be filled with it. + dtype(string): data type of the variable + persistable(bool): if this variable is persistable. + Default: False + force_cpu(bool): force this variable to be on CPU. + Default: False + name(str|None): The name of the variable. If set to None the variable + name will be generated automatically. + Default: None Returns: Variable: the created Variable + + Examples: + .. code-block:: python + + var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', + persistable=True, force_cpu=True, name='new_var') """ helper = LayerHelper("global_var", **locals()) var = helper.create_global_variable( - dtype=dtype, shape=shape, persistable=persistable, name=name) + dtype=dtype, shape=shape, persistable=persistable) helper.set_variable_initializer( var, initializer=Constant( value=float(value), force_cpu=force_cpu)) @@ -152,10 +165,11 @@ def sums(input, out=None): Args: input (Variable|list): The input tensor that has the elements that need to be summed up. + out (Variable|None): Output parameter. Returns the sum result. + Default: None Returns: - Variable: The tensor type variable that has the sum of input - written to it. + Variable: the sum of input. The same as the argument 'out' Examples: .. code-block::python @@ -328,13 +342,13 @@ def argmin(x, axis=0): x(Variable): The input to compute the indices of the min elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmin(x=in, axis=0) out = fluid.layers.argmin(x=in, axis=-1) """ @@ -359,13 +373,13 @@ def argmax(x, axis=0): x(Variable): The input to compute the indices of the max elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmax(x=in, axis=0) out = fluid.layers.argmax(x=in, axis=-1) """