Merge pull request #11383 from jacquesqiao/update-api-reference-1

update split_lod_tensor, create_array and array_length doc

Merge pull request #11383 from jacquesqiao/update-api-reference-1
update split_lod_tensor, create_array and array_length doc
16a0f746 · Yu Yang · GitHub · ce60bbf5 · 46ae1c93 · 16a0f746
9 changed file
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -133,7 +133,7 @@ $out = \max(x, 0)$
 __attribute__((unused)) constexpr char TanhDoc[] = R"DOC(
 Tanh Activation Operator.
-$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
+$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
 )DOC";

--- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc
+++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc
@@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 PolygonBoxTransform Operator.
+PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
 The input is the final geometry output in detection network.
 We use 2*n numbers to denote the coordinate shift from n corner vertices of
 the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi),
 the geometry output contains 2*n channels.
-PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
 )DOC");
  }
 };

--- a/paddle/fluid/operators/shape_op.cc
+++ b/paddle/fluid/operators/shape_op.cc
@@ -36,10 +36,13 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Input", "(Tensor), The input tensor.");
-    AddOutput("Out", "(Tensor), The shape of input tensor.");
+    AddOutput("Out",
+              "(Tensor), The shape of input tensor, the data type of the shape"
+              " is int64_t, will be on the same device with the input Tensor.");
    AddComment(R"DOC(
-Shape Operator. 
+Shape Operator
-Get the shape of input tensor.
+Get the shape of input tensor. Only support CPU input Tensor now.
 )DOC");
  }
 };

--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
@@ -113,14 +113,14 @@ The logistic loss is given as follows:
       $$loss = -Labels * \log(\sigma(X)) - (1 - Labels) * \log(1 - \sigma(X))$$
-We know that $$\sigma(X) = (1 / (1 + \exp(-X)))$$. By substituting this we get:
+We know that $$\sigma(X) = \\frac{1}{1 + \exp(-X)}$$. By substituting this we get:
       $$loss = X - X * Labels + \log(1 + \exp(-X))$$
 For stability and to prevent overflow of $$\exp(-X)$$ when X < 0,
 we reformulate the loss as follows:
-       $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-|X|))$$
+       $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-\|X\|))$$
 Both the input `X` and `Labels` can carry the LoD (Level of Details) information.
 However the output only shares the LoD with input `X`.

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -55,34 +55,36 @@ __all__ = [
 def split_lod_tensor(input, mask, level=0):
    """
-    **split_lod_tensor**
    This function takes in an input that contains the complete lod information,
    and takes in a mask which is used to mask certain parts of the input.
    The output is the true branch and the false branch with the mask applied to
-    the input at a certain level in the tensor.
+    the input at a certain level in the tensor. Mainly used in IfElse to split
+    data into two parts.
    Args:
        input(tuple|list|None): The input tensor that contains complete
                                lod information needed to construct the output.
        mask(list): A bool column vector which masks the input.
-        level(int): The specific lod level to rank.
+        level(int): The specific lod level to split.
    Returns:
-        Variable: The true branch of tensor as per the mask applied to input.
+        tuple(Variable, Variable):
-        Variable: The false branch of tensor as per the mask applied to input.
+        The true branch of tensor as per the mask applied to input.
+        The false branch of tensor as per the mask applied to input.
    Examples:
        .. code-block:: python
-          x = layers.data(name='x', shape=[1])
+          x = fluid.layers.data(name='x', shape=[1])
          x.persistable = True
-          y = layers.data(name='y', shape=[1])
+          y = fluid.layers.data(name='y', shape=[1])
          y.persistable = True
-          out_true, out_false = layers.split_lod_tensor(
+          out_true, out_false = fluid.layers.split_lod_tensor(
                input=x, mask=y, level=level)
    """
    helper = LayerHelper('split_lod_tensor', **locals())
    out_true = helper.create_tmp_variable(dtype=input.dtype)
@@ -105,8 +107,9 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
    This function takes in an input :math:`x`, the True branch, the False
    branch and a binary :math:`mask`. Using this information, this function
-    merges the True and False branches of the tensor into a single Output
+    merges the True and False branches of the tensor into a single tensor as
-    at a certain lod level indiacted by :math:`level`.
+    output at a certain lod level indicated by :math:`level`. Used in IfElse
+    to merge the output if True block and False Block.
    Args:
        in_true(tuple|list|None): The True branch to be merged.
@@ -114,7 +117,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
        x(tuple|list|None): The input tensor that contains complete
                            lod information needed to construct the output.
        mask(list): A bool column vector which masks the input.
-        level(int): The specific lod level to rank.
+        level(int): The specific lod level to merge.
    Returns:
        Variable: The merged output tensor.
@@ -965,14 +968,17 @@ def array_write(x, i, array=None):
 def create_array(dtype):
-    """This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the
+    """
-    LayerHelper.
+    **Create LoDTensorArray**
+    This function creates an array of LOD_TENSOR_ARRAY . It is mainly used to
+    implement RNN with array_write, array_read and While.
    Args:
-        dtype (int|float): The data type of the elements in the array.
+        dtype (int|float): The data type of the elements in the lod_tensor_array.
    Returns:
-        Variable: The tensor variable storing the elements of data type.
+        Variable: The lod_tensor_array variable storing the elements of data type.
    Examples:
        .. code-block:: python
@@ -1083,10 +1089,9 @@ def array_read(array, i):
    Examples:
        .. code-block:: python
-            tmp = fluid.layers.zeros(shape=[10], dtype='int32')
+          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
-            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
-            arr = fluid.layers.array_read(tmp, i=i)
+          arr = layers.array_read(tmp, i=i)
    """
    helper = LayerHelper('array_read', **locals())
    if not isinstance(
@@ -1140,9 +1145,14 @@ def shrink_memory(x, i, table):
 def array_length(array):
-    """This function performs the operation to find the length of the input
+    """
+    **Get the Length of Input LoDTensorArray**
+    This function performs the operation to find the length of the input
    LOD_TENSOR_ARRAY.
+    Related API: array_read, array_write, While.
    Args:
        array (LOD_TENSOR_ARRAY): The input array that will be used
                                  to compute the length.
@@ -1151,12 +1161,13 @@ def array_length(array):
        Variable: The length of the input LoDTensorArray.
    Examples:
-        .. code-block::python
+        .. code-block:: python
          tmp = fluid.layers.zeros(shape=[10], dtype='int32')
          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
          arr = fluid.layers.array_write(tmp, i=i)
          arr_len = fluid.layers.array_length(arr)
    """
    helper = LayerHelper('array_length', **locals())
    tmp = helper.create_tmp_variable(dtype='int64')
@@ -1247,6 +1258,42 @@ class ConditionalBlock(object):
 class Switch(object):
+    """
+    Switch class works just like a `if-elif-else`. Can be used in learning rate scheduler
+    to modify learning rate
+    The Semantics:
+    1. A `switch` control-flow checks cases one-by-one.
+    2. The condition of each case is a boolean value, which is a scalar Variable.
+    3. It runs the first matched case, or the default case if there is one.
+    4. Once it matches a case, it runs the corresponding branch and only that branch.
+    Examples:
+        .. code-block:: python
+            lr = fluid.layers.tensor.create_global_var(
+                shape=[1],
+                value=0.0,
+                dtype='float32',
+                persistable=True,
+                name="learning_rate")
+            one_var = tensor.fill_constant(
+                shape=[1], dtype='float32', value=1.0)
+            two_var = tensor.fill_constant(
+                shape=[1], dtype='float32', value=2.0)
+            with fluid.layers.control_flow.Switch() as switch:
+                with switch.case(global_step == zero_var):
+                    fluid.layers.tensor.assign(input=one_var, output=lr)
+                with switch.default():
+                    fluid.layers.tensor.assign(input=two_var, output=lr)
+    """
    def __init__(self, name=None):
        self.helper = LayerHelper('switch', name=name)
        self.inside_scope = False
@@ -1276,7 +1323,8 @@ class Switch(object):
        return ConditionalBlockGuard(cond_block)
    def default(self):
-        """create a default case for this switch
+        """
+        create a default case for this switch
        """
        pre_cond_num = len(self.pre_not_conditions)
        if pre_cond_num == 0:

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -620,7 +620,7 @@ def prior_box(input,
              offset=0.5,
              name=None):
    """
-    **Prior box operator**
+    **Prior Box Operator**
    Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
    Each position of the input produce N prior boxes, N is determined by
@@ -649,26 +649,30 @@ def prior_box(input,
       name(str): Name of the prior box op. Default: None.
    Returns:
-        boxes(Variable): the output prior boxes of PriorBox.
+        tuple: A tuple with two Variable (boxes, variances)
-             The layout is [H, W, num_priors, 4].
-             H is the height of input, W is the width of input,
+        boxes: the output prior boxes of PriorBox.
-             num_priors is the total
+        The layout is [H, W, num_priors, 4].
-             box count of each position of input.
+        H is the height of input, W is the width of input,
-        Variances(Variable): the expanded variances of PriorBox.
+        num_priors is the total
-             The layout is [H, W, num_priors, 4].
+        box count of each position of input.
-             H is the height of input, W is the width of input
-             num_priors is the total
+        variances: the expanded variances of PriorBox.
-             box count of each position of input
+        The layout is [H, W, num_priors, 4].
+        H is the height of input, W is the width of input
+        num_priors is the total
+        box count of each position of input
    Examples:
        .. code-block:: python
-            box, var = prior_box(
-            input=conv1,
+            box, var = fluid.layers.prior_box(
-            image=images,
+                input=conv1,
-            min_sizes=[100.],
+                image=images,
-            flip=True,
+                min_sizes=[100.],
-            clip=True)
+                flip=True,
+                clip=True)
    """
    helper = LayerHelper("prior_box", **locals())
    dtype = helper.input_dtype()
@@ -738,11 +742,9 @@ def multi_box_head(inputs,
                   stride=1,
                   name=None):
    """
-    **Prior_boxes**
    Generate prior boxes for SSD(Single Shot MultiBox Detector)
    algorithm. The details of this algorithm, please refer the
-    section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector)
+    section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
    <https://arxiv.org/abs/1512.02325>`_ .
    Args:
@@ -783,24 +785,27 @@ def multi_box_head(inputs,
       name(str): Name of the prior box layer. Default: None.
    Returns:
-        mbox_loc(Variable): The predicted boxes' location of the inputs.
+        tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
-             The layout is [N, H*W*Priors, 4]. where Priors
-             is the number of predicted boxes each position of each input.
+        mbox_loc: The predicted boxes' location of the inputs. The layout
-        mbox_conf(Variable): The predicted boxes' confidence of the inputs.
+        is [N, H*W*Priors, 4]. where Priors is the number of predicted
-             The layout is [N, H*W*Priors, C]. where Priors
+        boxes each position of each input.
-             is the number of predicted boxes each position of each input
-             and C is the number of Classes.
+        mbox_conf: The predicted boxes' confidence of the inputs. The layout
-        boxes(Variable): the output prior boxes of PriorBox.
+        is [N, H*W*Priors, C]. where Priors is the number of predicted boxes
-             The layout is [num_priors, 4]. num_priors is the total
+        each position of each input and C is the number of Classes.
-             box count of each position of inputs.
-        Variances(Variable): the expanded variances of PriorBox.
+        boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4].
-             The layout is [num_priors, 4]. num_priors is the total
+        num_priors is the total box count of each position of inputs.
-             box count of each position of inputs
+        variances: the expanded variances of PriorBox. The layout is
+        [num_priors, 4]. num_priors is the total box count of each position of inputs
    Examples:
        .. code-block:: python
-          mbox_locs, mbox_confs, box, var = layers.multi_box_head(
+          mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
            inputs=[conv1, conv2, conv3, conv4, conv5, conv5],
            image=images,
            num_classes=21,

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -199,25 +199,28 @@ def polynomial_decay(learning_rate,
                     end_learning_rate=0.0001,
                     power=1.0,
                     cycle=False):
-    """Applies polynomial decay to the initial learning rate.
+    """
+    Applies polynomial decay to the initial learning rate.
+    .. code-block:: python
+     if cycle:
+       decay_steps = decay_steps * ceil(global_step / decay_steps)
+     else:
+       global_step = min(global_step, decay_steps)
+       decayed_learning_rate = (learning_rate - end_learning_rate) *
+            (1 - global_step / decay_steps) ^ power + end_learning_rate
-    >>> if cycle:
-    >>>     decay_steps = decay_steps * ceil(global_step / decay_steps)
-    >>> else:
-    >>>     global_step = min(global_step, decay_steps)
-    >>> decayed_learning_rate = (learning_rate - end_learning_rate) *
-    >>>                   (1 - global_step / decay_steps) ^ power +
-    >>>                   end_learning_rate
    Args:
-        learning_rate: A scalar float32 value or a Variable. This
+        learning_rate(Variable|float32): A scalar float32 value or a Variable. This
-          will be the initial learning rate during training
+          will be the initial learning rate during training.
-        decay_steps: A Python `int32` number.
+        decay_steps(int32): A Python `int32` number.
-        end_learning_rate: A Python `float` number.
+        end_learning_rate(float): A Python `float` number.
-        power: A Python `float` number
+        power(float): A Python `float` number.
-        cycle: Boolean. If set true, decay the learning rate every decay_steps.
+        cycle(bool): If set true, decay the learning rate every decay_steps.
    Returns:
-        The decayed learning rate
+        Variable: The decayed learning rate
    """
    global_step = _decay_step_counter()

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1290,6 +1290,45 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
 def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None):
+    """
+    The input of the softmax layer is a 2-D tensor with shape N x K (N is the
+    batch_size, K is the dimension of input feature). The output tensor has the
+    same shape as the input tensor.
+    For each row of the input tensor, the softmax operator squashes the
+    K-dimensional vector of arbitrary real values to a K-dimensional vector of real
+    values in the range [0, 1] that add up to 1.
+    It computes the exponential of the given dimension and the sum of exponential
+    values of all the other dimensions in the K-dimensional vector input.
+    Then the ratio of the exponential of the given dimension and the sum of
+    exponential values of all the other dimensions is the output of the softmax
+    operator.
+    For each row :math:`i` and each column :math:`j` in Input(X), we have:
+    .. math::
+        Out[i, j] = \\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}
+    Args:
+        input (Variable): The input variable.
+        bias_attr (ParamAttr): attributes for bias
+        param_attr (ParamAttr): attributes for parameter
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
+        library is installed.
+    Returns:
+        Variable: output of softmax
+    Examples:
+        .. code-block:: python
+             fc = fluid.layers.fc(input=x, size=10)
+             softmax = fluid.layers.softmax(input=fc)
+    """
    helper = LayerHelper('softmax', **locals())
    dtype = helper.input_dtype()
    softmax_out = helper.create_tmp_variable(dtype)
@@ -1951,27 +1990,57 @@ def batch_norm(input,
               moving_variance_name=None,
               do_model_average_for_mean_and_var=False):
    """
-    This function helps create an operator to implement
+    **Batch Normalization Layer**
-    the BatchNorm layer using the configurations from the input parameters.
+    Can be used as a normalizer function for conv2d and fully_connected operations.
+    The required data format for this layer is one of the following:
+    1. NHWC `[batch, in_height, in_width, in_channels]`
+    2. NCHW `[batch, in_channels, in_height, in_width]`
+    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
+    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
+    for more details.
+    :math:`input` is the input features over a mini-batch.
+    ..  math::
+        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
+        \ mini-batch\ mean \\\\
+        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
+        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
    Args:
-        input (Variable): the input variable.
+        input(variable): The input variable which is a LoDTensor.
-        act (str): activation type
+        act(string, Default None): Activation type, linear|relu|prelu|...
-        is_test (bool): whether to run batch_norm as test mode.
+        is_test(bool, Default False): Used for training or training.
-        momentum (float): momentum
+        momentum(float, Default 0.9):
-        epsilon (float): epsilon, default 1e-05
+        epsilon(float, Default 1e-05):
-        param_attr (ParamAttr|None): attributes for parameter
+        param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
-        bias_attr (ParamAttr|None): attributes for bias
+        bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
-        data_layout (str): data layout, default NCHW
+        data_layout(string, default NCHW): NCHW|NHWC
-        in_place (bool): if True, do not create tmp variable
+        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
-        use_mkldnn (bool): ${use_mkldnn_comment}
+        use_mkldnn(bool, Default false): ${use_mkldnn_comment}
-        name (str): The name of this layer. It is optional.
+        name(string, Default None): A name for this layer(optional). If set None, the layer
-        moving_mean_name (str): The name of moving mean variable name, optional.
+            will be named automatically.
-        moving_variance_name (str): The name of moving variance name, optional.
+        moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
-        do_model_average_for_mean_and_var (bool):
+        moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
+        do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
    Returns:
-        Variable: output of batch_norm layer.
+        Variable: A tensor variable which is the result after applying batch normalization on the input.
+    Examples:
+        .. code-block:: python
+            hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
+            hidden2 = fluid.layers.batch_norm(input=hidden1)
    """
    helper = LayerHelper('batch_norm', **locals())
    dtype = helper.input_dtype()
@@ -4599,12 +4668,13 @@ def image_resize(input,
                 name=None,
                 resample='BILINEAR'):
    """
-    Resize a batch of images.
+    **Resize a Batch of Images**
    The input must be a tensor of the shape (num_batches, channels, in_h, in_w), 
    and the resizing only applies on the last two dimensions(hight and width).
    Supporting resample methods:
        'BILINEAR' : Bilinear interpolation
    Args:
@@ -4624,8 +4694,8 @@ def image_resize(input,
                       Default: 'BILINEAR'
    Returns:
-        out (Variable): The output is a 4-D tensor of the shape
+        Variable: The output is a 4-D tensor of the shape
-                        (num_batches, channls, out_h, out_w).
+        (num_batches, channls, out_h, out_w).
    Examples:
        .. code-block:: python
@@ -4709,8 +4779,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'):
        resample (str): resample method, default: BILINEAR.
    Returns:
-        out (Variable): The output is a 4-D tensor of the shape
+        Variable: The output is a 4-D tensor of the shape
-                        (num_batches, channls, out_h, out_w).
+        (num_batches, channls, out_h, out_w).
    """
    in_shape = input.shape
    if len(in_shape) != 4:
@@ -4729,6 +4799,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'):
 def gather(input, index):
    """
+    **Gather Layer**
    Output is obtained by gathering entries of the outer-most dimension 
    of X indexed by `index` and concatenate them together.

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -35,10 +35,29 @@ __all__ = [
    'argmax',
    'ones',
    'zeros',
+    'reverse',
 ]
 def create_tensor(dtype, name=None, persistable=False):
+    """
+    Create an variable, which will hold a LoDTensor with data type dtype.
+    Args:
+        dtype(string): 'float32'|'int32'|..., the data type of the
+            created tensor.
+        name(string): The name of the created tensor, if not set,
+            the name will be a random unique one.
+        persistable(bool): Set the persistable flag of the create tensor.
+    Returns:
+        Variable: The tensor variable storing the created tensor.
+    Examples:
+        .. code-block:: python
+          tensor = fluid.layers.create_tensor(dtype='float32')
+    """
    helper = LayerHelper("create_tensor", **locals())
    return helper.create_variable(
        name=helper.name, dtype=dtype, persistable=persistable)