Merge pull request #11457 from JiayiFeng/dev_add_doc

Add API reference

Merge pull request #11457 from JiayiFeng/dev_add_doc
Add API reference
1171c2c5 · Yu Yang · GitHub · ea03a228 · f6daab43 · 1171c2c5
7 changed file
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -443,7 +443,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 Swish Activation Operator.

-$$out = \frac{x}{1 + e^{- \beta x}}$$
+$$out = \\frac{x}{1 + e^{- \beta x}}$$

 )DOC");
  }

--- a/paddle/fluid/operators/pool_op.cc
+++ b/paddle/fluid/operators/pool_op.cc
@@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() {
  // TODO(dzhwinter): need to registered layout transform function

  AddComment(R"DOC(
-Pool2d Operator.
-
 The pooling2d operation calculates the output based on
 the input, pooling_type and ksize, strides, paddings parameters.
 Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
@@ -215,19 +213,28 @@ These two elements represent height and width, respectively.
 The input(X) size and output(Out) size may be different.

 Example:
+
  Input:
+
       X shape: $(N, C, H_{in}, W_{in})$
+
  Output:
+
       Out shape: $(N, C, H_{out}, W_{out})$
+
  For ceil_mode = false:
       $$
-       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
-       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
+       H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
+       $$
+       $$
+       W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
       $$
  For ceil_mode = true:
       $$
-       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\
-       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
+       H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1
+       $$
+       $$
+       W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
       $$

 )DOC");

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -819,17 +819,25 @@ def max_sequence_len(rank_table):


 def lod_tensor_to_array(x, table):
-    """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
+    """ 
+    Convert a LoDTensor to a LoDTensorArray.
+
+    This function split a LoDTesnor to a LoDTensorArray according to its LoD 
+    information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in 
+    PaddlePaddle. The generated LoDTensorArray of this function can be further read 
+    or written by `read_from_array()` and `write_to_array()` operators. However, 
+    this function is generally an internal component of PaddlePaddle `DynamicRNN`. 
+    Users should not use it directly.

    Args:
-        x (Variable|list): The LOD tensor to be converted to a LOD tensor array.
+        x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
        table (ParamAttr|list): The variable that stores the level of lod
                                which is ordered by sequence length in
-                                descending order.
+                                descending order. It is generally generated 
+                                by `layers.lod_rank_table()` API.

    Returns:
-        Variable: The variable of type array that has been converted from a
-                  tensor.
+        Variable: The LoDTensorArray that has been converted from the input tensor.

    Examples:
        .. code-block:: python
@@ -1141,6 +1149,13 @@ def array_length(array):


 class ConditionalBlockGuard(BlockGuard):
+    """
+    ConditionalBlockGuard is derived from BlockGuard. It is dedicated for 
+    holding a ConditionalBlock, and helping users entering and exiting the 
+    ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard 
+    is generally an internal component of IfElse, users should not use it directly.
+    """
+
    def __init__(self, block):
        if not isinstance(block, ConditionalBlock):
            raise TypeError("block should be conditional block")
@@ -1825,26 +1840,26 @@ def reorder_lod_tensor_by_rank(x, rank_table):

 def is_empty(x, cond=None, **ignored):
    """
-    **Is Empty**
-
-    This layer returns the truth value of whether the variable is empty.
+    Test whether a Variable is empty.

    Args:
-        x(Variable): Operand of *is_empty*
-        cond(Variable|None): Optional output variable to store the result
-                             of *is_empty*
+        x (Variable): The Variable to be tested.
+        cond (Variable|None): Output parameter. Returns the test result 
+                              of given 'x'. Default: None

    Returns:
-        Variable: The tensor variable storing the output of *is_empty*.
+        Variable: A bool scalar. True if 'x' is an empty Variable.

    Raises:
        TypeError: If input cond is not a variable, or cond's dtype is
-                   not bool
+                   not bool.

    Examples:
        .. code-block:: python

-          less = fluid.layers.is_empty(x=input)
+          res = fluid.layers.is_empty(x=input)
+          # or:
+          fluid.layers.is_empty(x=input, cond=res)
    """
    helper = LayerHelper("is_empty", **locals())
    if cond is None:

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -544,6 +544,41 @@ def shuffle(reader, buffer_size):


 def batch(reader, batch_size):
+    """
+    This layer is a reader decorator. It takes a reader and adds 
+    'batching' decoration on it. When reading with the result 
+    decorated reader, output data will be automatically organized 
+    to the form of batches.
+
+    Args:
+        reader(Variable): The reader to be decorated with 'batching'.
+        batch_size(int): The batch size.
+
+    Returns:
+        Variable: The reader which has been decorated with 'batching'.
+
+    Examples:
+        .. code-block:: python
+
+            raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
+                                                           './data2.recordio'],
+                                                    shapes=[(3,224,224), (1)],
+                                                    lod_levels=[0, 0],
+                                                    dtypes=['float32', 'int64'],
+                                                    thread_num=2,
+                                                    buffer_size=2)
+            batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5)
+
+            # If we read data with the raw_reader:
+            #     data = fluid.layers.read_file(raw_reader)
+            # We can only get data instance by instance.
+            # 
+            # However, if we read data with the batch_reader:
+            #     data = fluid.layers.read_file(batch_reader)
+            # Each 5 adjacent instances will be automatically combined together 
+            # to become a batch. So what we get('data') is a batch data instead 
+            # of an instance.
+    """
    return __create_unshared_decorated_reader__(
        'create_batch_reader', reader, {'batch_size': int(batch_size)})

@@ -589,15 +624,41 @@ def parallel(reader):
                                              {})


-def read_file(file_obj):
+def read_file(reader):
+    """
+    Execute the given reader and get data via it.
+
+    A reader is also a Variable. It can be a raw reader generated by 
+    `fluid.layers.open_files()` or a decorated one generated by 
+    `fluid.layers.double_buffer()` and so on.
+
+    Args:
+
+        reader(Variable): The reader to execute.
+
+    Returns:
+        Tuple[Variable]: Data read via the given reader.
+
+    Examples:
+        .. code-block:: python
+
+           data_file = fluid.layers.open_files(
+                filenames=['mnist.recordio'],
+                shapes=[(-1, 748), (-1, 1)],
+                lod_levels=[0, 0],
+                dtypes=["float32", "int64"])
+            data_file = fluid.layers.double_buffer(
+                fluid.layers.batch(data_file, batch_size=64))
+            input, label = fluid.layers.read_file(data_file)
+    """
    helper = LayerHelper('read_file')
    out = [
        helper.create_tmp_variable(
            stop_gradient=True, dtype='float32')
-        for _ in range(len(file_obj.desc.shapes()))
+        for _ in range(len(reader.desc.shapes()))
    ]
    helper.append_op(
-        type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out})
+        type='read', inputs={'Reader': [reader]}, outputs={'Out': out})
    if len(out) == 1:
        return out[0]
    else:

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -71,21 +71,40 @@ def noam_decay(d_model, warmup_steps):


 def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
-    """Applies exponential decay to the learning rate.
+    """
+    Applies exponential decay to the learning rate. 
+
+    When training a model, it is often recommended to lower the learning rate as the 
+    training progresses. By using this function, the learning rate will be decayed by 
+    'decay_rate' every 'decay_steps' steps.
+
+    >>> if staircase == True:
+    >>>     decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps)
+    >>> else:
+    >>>     decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)

-    ```python
-    decayed_learning_rate = learning_rate *
-            decay_rate ^ (global_step / decay_steps)
-    ```
    Args:
-        learning_rate: A scalar float32 value or a Variable. This
-          will be the initial learning rate during training
-        decay_steps: A Python `int32` number.
-        decay_rate: A Python `float` number.
-        staircase: Boolean. If set true, decay the learning rate every decay_steps.
+        learning_rate(Variable|float): The initial learning rate.
+        decay_steps(int): See the decay computation above.
+        decay_rate(float): The decay rate. See the decay computation above.
+        staircase(Boolean): If True, decay the learning rate at discrete intervals.
+                            Default: False

    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
+
+    Examples:
+        .. code-block:: python
+
+          base_lr = 0.1
+          sgd_optimizer = fluid.optimizer.SGD(
+                learning_rate=fluid.layers.exponential_decay(
+                    learning_rate=base_lr,
+                    decay_steps=10000,
+                    decay_rate=0.5,
+                    staircase=True))
+          sgd_optimizer.minimize(avg_cost)
+
    """
    global_step = _decay_step_counter()

@@ -129,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):


 def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
-    """Applies inverse time decay to the initial learning rate.
+    """
+    Applies inverse time decay to the initial learning rate.
+
+    When training a model, it is often recommended to lower the learning rate as the 
+    training progresses. By using this function, an inverse decay function will be 
+    applied to the initial learning rate.

-    >>> if staircase:
+    >>> if staircase == True:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
    >>> else:
    >>>     decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)

    Args:
-        learning_rate: A scalar float32 value or a Variable. This
-          will be the initial learning rate during training.
-        decay_steps: A Python `int32` number.
-        decay_rate: A Python `float` number.
-        staircase: Boolean. If set true, decay the learning rate every decay_steps.
+        learning_rate(Variable|float): The initial learning rate.
+        decay_steps(int): See the decay computation above.
+        decay_rate(float): The decay rate. See the decay computation above.
+        staircase(Boolean): If True, decay the learning rate at discrete intervals.
+                            Default: False

    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
+
+    Examples:
+        .. code-block:: python
+
+          base_lr = 0.1
+          sgd_optimizer = fluid.optimizer.SGD(
+                learning_rate=fluid.layers.inverse_time_decay(
+                    learning_rate=base_lr,
+                    decay_steps=10000,
+                    decay_rate=0.5,
+                    staircase=True))
+          sgd_optimizer.minimize(avg_cost)
    """
    global_step = _decay_step_counter()


--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -106,14 +106,15 @@ def fc(input,
    """
    **Fully Connected Layer**

-    The fully connected layer can take multiple tensors as its inputs. It
-    creates a variable called weights for each input tensor, which represents
-    a fully connected weight matrix from each input unit to each output unit.
-    The fully connected layer multiplies each input tensor with its coresponding
-    weight to produce an output Tensor. If multiple input tensors are given,
-    the results of multiple multiplications will be sumed up. If bias_attr is
-    not None, a bias variable will be created and added to the output. Finally,
-    if activation is not None, it will be applied to the output as well.
+    This function creates a fully connected layer in the network. It can take 
+    multiple tensors as its inputs. It creates a variable called weights for 
+    each input tensor, which represents a fully connected weight matrix from 
+    each input unit to each output unit. The fully connected layer multiplies 
+    each input tensor with its coresponding weight to produce an output Tensor. 
+    If multiple input tensors are given, the results of multiple multiplications 
+    will be sumed up. If bias_attr is not None, a bias variable will be created 
+    and added to the output. Finally, if activation is not None, it will be applied 
+    to the output as well.

    This process can be formulated as follows:

@@ -154,7 +155,7 @@ def fc(input,
        name (str, default None): The name of this layer.

    Returns:
-        A tensor variable storing the transformation result.
+        Variable: The transformation result.

    Raises:
        ValueError: If rank of the input tensor is less than 2.
@@ -162,8 +163,7 @@ def fc(input,
    Examples:
        .. code-block:: python

-          data = fluid.layers.data(
-              name="data", shape=[32, 32], dtype="float32")
+          data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
          fc = fluid.layers.fc(input=data, size=1000, act="tanh")
    """

@@ -911,7 +911,7 @@ def cos_sim(X, Y):
    Args:
        X (Variable): The input X.
        Y (Variable): The input Y.
-    
+
    Returns:
        Variable: the output of cosine(X, Y).
    """
@@ -1117,7 +1117,7 @@ def chunk_eval(input,
        chunk_scheme (str): ${chunk_scheme_comment}
        num_chunk_types (int): ${num_chunk_types_comment}
        excluded_chunk_types (list): ${excluded_chunk_types_comment}
-    
+
    Returns:
        tuple: tuple containing: (precision, recall, f1_score,
               num_infer_chunks, num_label_chunks,
@@ -1177,7 +1177,7 @@ def sequence_conv(input,
        bias_attr (ParamAttr|None): attributes for bias
        param_attr (ParamAttr|None): attributes for parameter
        act (str): the activation type
-    
+
    Returns:
        Variable: output of sequence_conv
    """
@@ -1740,6 +1740,7 @@ def sequence_last_step(input):
    return sequence_pool(input=input, pool_type="last")


+@templatedoc()
 def pool2d(input,
           pool_size=-1,
           pool_type="max",
@@ -1751,24 +1752,45 @@ def pool2d(input,
           use_mkldnn=False,
           name=None):
    """
-    This function adds the operator for pooling in 2 dimensions, using the
-    pooling configurations mentioned in input parameters.
+    ${comment}

    Args:
-        input (Variable): ${input_comment}
-        pool_size (int): ${ksize_comment}
-        pool_type (str): ${pooling_type_comment}
+        input (Variable): The input tensor of pooling operator. The format of 
+                          input tensor is NCHW, where N is batch size, C is 
+                          the number of channels, H is the height of the 
+                          feature, and W is the width of the feature.
+        pool_size (int): The side length of pooling windows. All pooling 
+                         windows are squares with pool_size on a side.
+        pool_type: ${pooling_type_comment}
        pool_stride (int): stride of the pooling layer.
        pool_padding (int): padding size.
-        global_pooling (bool): ${global_pooling_comment}
-        use_cudnn (bool): ${use_cudnn_comment}
-        ceil_mode (bool): ${ceil_mode_comment}
-        use_mkldnn (bool): ${use_mkldnn_comment}
-        name (str): A name for this layer(optional). If set None, the layer
-            will be named automatically.
-    
+        global_pooling: ${global_pooling_comment}
+        use_cudnn: ${use_cudnn_comment}
+        ceil_mode: ${ceil_mode_comment}
+        use_mkldnn: ${use_mkldnn_comment}
+        name (str|None): A name for this layer(optional). If set None, the 
+                        layer will be named automatically.
+
    Returns:
-        Variable: output of pool2d layer.
+        Variable: The pooling result.
+
+    Raises:
+        ValueError: If 'pool_type' is not "max" nor "avg"
+        ValueError: If 'global_pooling' is False and 'pool_size' is -1
+        ValueError: If 'use_cudnn' is not a bool value.
+
+    Examples:
+
+        .. code-block:: python
+
+          data = fluid.layers.data(
+              name='data', shape=[3, 32, 32], dtype='float32')
+          conv2d = fluid.layers.pool2d(
+                            input=data, 
+                            pool_size=2, 
+                            pool_type='max', 
+                            pool_stride=1, 
+                            global_pooling=False)
    """
    if pool_type not in ["max", "avg"]:
        raise ValueError(
@@ -2103,7 +2125,7 @@ def beam_search_decode(ids, scores, name=None):
        ids (Variable): ${ids_comment}
        scores (Variable): ${scores_comment}
        name (str): The name of this layer. It is optional.
-    
+
    Returns:
        tuple: a tuple of two output variable: sentence_ids, sentence_scores
    """
@@ -2537,7 +2559,7 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0):
        beam_size (int): ${beam_size_comment}
        end_id (int): ${end_id_comment}
        level (int): ${level_comment}
-    
+
    Returns:
        tuple: a tuple of beam_search output variables: selected_ids, selected_scores
    '''
@@ -3195,25 +3217,51 @@ def topk(input, k, name=None):
    This operator is used to find values and indices of the k largest entries
    for the last dimension.

-    If the input is a vector (rank=1), finds the k largest entries in the vector
+    If the input is a vector (1-D Tensor), finds the k largest entries in the vector
    and outputs their values and indices as vectors. Thus values[j] is the j-th
    largest entry in input, and its index is indices[j].

    If the input is a Tensor with higher rank, this operator computes the top k
    entries along the last dimension.

+    For example:
+
+    .. code-block:: text
+
+        If:
+            input = [[5, 4, 2, 3],
+                     [9, 7, 10, 25],
+                     [6, 2, 10, 1]]
+            k = 2
+
+        Then:
+            The first output:
+            values = [[5, 4],
+                      [10, 25],
+                      [6, 10]]
+
+            The second output:
+            indices = [[0, 1],
+                       [2, 3],
+                       [0, 2]]
+
    Args:
        input(Variable): The input variable which can be a vector or Tensor with
            higher rank.
-        k(int): An integer value to specify the top k largest elements.
+        k(int):  The number of top elements to look for along the last dimension 
+                 of input.
        name(str|None): A name for this layer(optional). If set None, the layer
-                       will be named automatically.
+                       will be named automatically. 
+                       Default: None

    Returns:
-        values(Variable): The k largest elements along each last dimensional
-            slice.
-        indices(Variable): The indices of values within the last dimension of
-            input.
+        Tuple[Variable]: A tuple with two elements. Each element is a Variable. 
+        The first one is k largest elements along each last 
+        dimensional slice. The second one is indices of values 
+        within the last dimension of input.
+
+    Raises:
+        ValueError: If k < 1 or k is not less than the last dimension of input

    Examples:
        .. code-block:: python
@@ -3221,7 +3269,7 @@ def topk(input, k, name=None):
            top5_values, top5_indices = layers.topk(input, k=5)
    """
    shape = input.shape
-    if k < 1 and k >= shape[-1]:
+    if k < 1 or k >= shape[-1]:
        raise ValueError("k must be greater than 0 and less than %d." %
                         (shape[-1]))

@@ -3523,7 +3571,7 @@ def nce(input,
        param_attr (ParamAttr|None): attributes for parameter
        bias_attr (ParamAttr|None): attributes for bias
        num_neg_samples (int): ${num_neg_samples_comment}
-    
+
    Returns:
        Variable: The output nce loss.


--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -89,16 +89,29 @@ def create_global_var(shape,
                      force_cpu=False,
                      name=None):
    """
-    Create a global variable. such as global_step
+    Create a new variable in the global block(block 0).
+
    Args:
        shape(list[int]): shape of the variable
-        value(float): the value of the variable
-        dtype(string): element type of the parameter
-        persistable(bool): if this variable is persistable
-        force_cpu(bool): force this variable to be on CPU
+        value(float): the value of the variable. The new created 
+                      variable will be filled with it.
+        dtype(string): data type of the variable
+        persistable(bool): if this variable is persistable. 
+                           Default: False
+        force_cpu(bool): force this variable to be on CPU. 
+                         Default: False
+        name(str|None): The name of the variable. If set to None the variable 
+                        name will be generated automatically. 
+                        Default: None

    Returns:
        Variable: the created Variable
+
+    Examples:
+        .. code-block:: python
+
+            var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', 
+                                 persistable=True, force_cpu=True, name='new_var')
    """
    helper = LayerHelper("global_var", **locals())
    var = helper.create_global_variable(
@@ -156,7 +169,8 @@ def concat(input, axis=0, name=None):

    Examples:
        .. code-block:: python
-          out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
+        
+           out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
    """
    helper = LayerHelper('concat', **locals())
    out = helper.create_tmp_variable(dtype=helper.input_dtype())
@@ -169,19 +183,21 @@ def concat(input, axis=0, name=None):


 def sums(input, out=None):
-    """This function performs the sum operation on the input and returns the
+    """
+    This function performs the sum operation on the input and returns the
    result as the output.

    Args:
        input (Variable|list): The input tensor that has the elements
                               that need to be summed up.
+        out (Variable|None): Output parameter. The sum result.
+                             Default: None

    Returns:
-        Variable: The tensor type variable that has the sum of input
-                  written to it.
+        Variable: the sum of input. The same as the argument 'out'

    Examples:
-        .. code-block::python
+        .. code-block:: python

          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
@@ -352,13 +368,13 @@ def argmin(x, axis=0):
        x(Variable): The input to compute the indices of
                     the min elements.
        axis(int): Axis to compute indices along.
-    
+
    Returns:
        Variable: The tensor variable storing the output
-    
+
    Examples:
        .. code-block:: python
-          
+
          out = fluid.layers.argmin(x=in, axis=0)
          out = fluid.layers.argmin(x=in, axis=-1)  
    """
@@ -383,13 +399,13 @@ def argmax(x, axis=0):
        x(Variable): The input to compute the indices of
                     the max elements.
        axis(int): Axis to compute indices along.
-    
+
    Returns:
        Variable: The tensor variable storing the output
-    
+
    Examples:
        .. code-block:: python
-          
+
          out = fluid.layers.argmax(x=in, axis=0)
          out = fluid.layers.argmax(x=in, axis=-1)  
    """