Polish documentation

* row_conv * uniform_random * layer_norm * create_parameter * hard_shrink * ssd_loss

Polish documentation
* row_conv * uniform_random * layer_norm * create_parameter * hard_shrink * ssd_loss
2955ff58 · yuyang18 · 9328c3cf · 2955ff58 · 2955ff58 · 2955ff58
7 changed file
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -276,13 +276,12 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 HardShrink Activation Operator.

-$$
-out = \begin{cases} 
+..  math::
+    out = \begin{cases}
            x, \text{if } x > \lambda \\
            x, \text{if } x < -\lambda \\
            0,  \text{otherwise}
          \end{cases}
-$$

 )DOC");
  }

--- a/paddle/fluid/operators/layer_norm_op.cc
+++ b/paddle/fluid/operators/layer_norm_op.cc
@@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel {
 class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("X", "(LoDTensor) The input tensor.");
+    AddInput("X", "The input tensor.");
    AddInput("Scale",
-             "(Tensor, optional) Scale is a 1-dimensional tensor of size "
+             "(optional) Scale is a 1-dimensional tensor of size "
             "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
             "It is applied to the output.")
        .AsDispensable();
    AddInput("Bias",
-             "(Tensor, optional) Bias is a 1-dimensional tensor of size "
+             "(optional) Bias is a 1-dimensional tensor of size "
             "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])."
             "It is applied to the output.")
        .AsDispensable();
-    AddOutput("Y", "(LoDTensor) Result after normalization.");
-    AddOutput("Mean", "(Tensor) Mean of the current mini batch.")
-        .AsIntermediate();
-    AddOutput("Variance", "(Tensor) Variance of the current mini batch.")
+    AddOutput("Y", "Result after normalization.");
+    AddOutput("Mean", "Mean of the current mini batch.").AsIntermediate();
+    AddOutput("Variance", "Variance of the current mini batch.")
        .AsIntermediate();

    AddAttr<float>("epsilon",
-                   "(float, default 1e-5) Constant for "
-                   "numerical stability")
+                   "Constant for numerical stability [default 1e-5].")
        .SetDefault(1e-5)
        .AddCustomChecker([](const float &epsilon) {
          PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f,
                         "'epsilon' should be between 0.0 and 0.001.");
        });
    AddAttr<int>("begin_norm_axis",
-                 "(int default:1), the "
-                 "axis of `begin_norm_axis ... Rank(X) - 1` will be "
+                 "the axis of `begin_norm_axis ... Rank(X) - 1` will be "
                 "normalized. `begin_norm_axis` splits the tensor(`X`) to a "
-                 "matrix [N,H].")
+                 "matrix [N,H]. [default 1].")
        .SetDefault(1)
        .AddCustomChecker([](const int &begin_norm_axis) {
          PADDLE_ENFORCE_GT(begin_norm_axis, 0,
@@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker {
        });

    AddComment(R"DOC(
-Layer Normalization.
-Layer Norm has been implemented as discussed in the paper:
-https://arxiv.org/abs/1607.06450
-...
+Assume feature vectors exist on dimensions
+:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
+along these dimensions for each feature vector :math:`a` with size
+:math:`H`, then normalize each feature vector using the corresponding
+statistics. After that, apply learnable gain and bias on the normalized
+tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
+
+Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
 )DOC");
  }
 };

--- a/paddle/fluid/operators/row_conv_op.cc
+++ b/paddle/fluid/operators/row_conv_op.cc
@@ -78,18 +78,18 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(LoDTensor), the input(X) is a LodTensor, which supports "
+             "the input(X) is a LodTensor, which supports "
             "variable time-length input sequences. The underlying tensor "
             "in this LoDTensor is a matrix with shape (T x N), where T "
             "is the total time steps in this mini-batch and N is the input "
             "data dimension.");
    AddInput("Filter",
-             "(Tensor), the input(Filter) is a learnable parameter. It "
+             "the input(Filter) is a learnable parameter. It "
             "is a 2-D tensor with shape (future_context x N), where, "
             "future_context is the future context length and N is the data "
             "dimension.");
    AddOutput("Out",
-              "(LoDTensor), the output(Out) is a LodTensor, which supports "
+              "the output(Out) is a LodTensor, which supports "
              "variable time-length input sequences. The underlying tensor "
              "in this LodTensor is a matrix with shape T x N, i.e., the "
              "same shape as X.");
@@ -117,6 +117,20 @@ $$
 out_{i, :} = \sum_{j=i}^{i + context} in_{j,:} \dot W_{i-j, :}
 $$

+In the above equation:
+
+* $Out_{i}$: The i-th row of output variable with shape [1, D].
+
+* $\\tau$: Future context size.
+
+* $X_{j}$: The j-th row of input variable with shape [1, D].
+
+* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D].
+
+More details about row_conv please refer to
+the design document
+https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 .
+
 )DOC");
  }
 };

--- a/paddle/fluid/operators/uniform_random_op.cc
+++ b/paddle/fluid/operators/uniform_random_op.cc
@@ -86,32 +86,26 @@ class UniformRandomOp : public framework::OperatorWithKernel {
 class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddOutput("Out", "(Tensor) The output tensor of uniform random op");
+    AddOutput("Out", "The output tensor of uniform random op");
    AddComment(R"DOC(
 Uniform random operator.

 This operator initializes a tensor with random values sampled from a
-uniform distribution.
+uniform distribution. The random result is in set [min, max].

 )DOC");
-    AddAttr<std::vector<int>>("shape",
-                              "(vector<int>) The shape of the output tensor");
-    AddAttr<float>("min",
-                   "(float, default -1.0) "
-                   "Minimum value of uniform random")
+    AddAttr<std::vector<int>>("shape", "The shape of the output tensor");
+    AddAttr<float>("min", "Minimum value of uniform random. [default -1.0].")
        .SetDefault(-1.0f);
-    AddAttr<float>("max",
-                   "(float, default 1.0) "
-                   "Maximun value of uniform random")
+    AddAttr<float>("max", "Maximun value of uniform random. [default 1.0].")
        .SetDefault(1.0f);
    AddAttr<int>("seed",
-                 "(int, default 0) "
                 "Random seed used for generating samples. "
                 "0 means use a seed generated by the system."
                 "Note that if seed is not 0, this operator will always "
-                 "generate the same random numbers every time.")
+                 "generate the same random numbers every time. [default 0].")
        .SetDefault(0);
-    AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type")
+    AddAttr<int>("dtype", "Output tensor data type. [default 5(FP32)].")
        .SetDefault(framework::proto::VarType::FP32);
  }
 };

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -373,22 +373,55 @@ def ssd_loss(location,
    confidence loss (or classification loss) by performing the following steps:

    1. Find matched boundding box by bipartite matching algorithm.
+
      1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
+
      1.2 Compute matched boundding box by bipartite matching algorithm.
+
    2. Compute confidence for mining hard examples
+
      2.1. Get the target label based on matched indices.
+
      2.2. Compute confidence loss.
+
    3. Apply hard example mining to get the negative example indices and update
       the matched indices.
+
    4. Assign classification and regression targets
+
      4.1. Encoded bbox according to the prior boxes.
+
      4.2. Assign regression targets.
+
      4.3. Assign classification targets.
+
    5. Compute the overall objective loss.
+
      5.1 Compute confidence loss.
+
      5.1 Compute localization loss.
+
      5.3 Compute the overall weighted loss.

+    >>> import paddle.fluid.layers as layers
+    >>> pb = layers.data(
+    >>>            name='prior_box',
+    >>>            shape=[10, 4],
+    >>>            append_batch_size=False,
+    >>>            dtype='float32')
+    >>> pbv = layers.data(
+    >>>            name='prior_box_var',
+    >>>            shape=[10, 4],
+    >>>            append_batch_size=False,
+    >>>            dtype='float32')
+    >>> loc = layers.data(name='target_box', shape=[10, 4], dtype='float32')
+    >>> scores = layers.data(name='scores', shape=[10, 21], dtype='float32')
+    >>> gt_box = layers.data(
+    >>>         name='gt_box', shape=[4], lod_level=1, dtype='float32')
+    >>> gt_label = layers.data(
+    >>>         name='gt_label', shape=[1], lod_level=1, dtype='float32')
+    >>>     loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
+
    Args:
        location (Variable): The location predictions are a 3D Tensor with
            shape [N, Np, 4], N is the batch size, Np is total number of
@@ -426,34 +459,12 @@ def ssd_loss(location,
            mining_type is 'hard_example'.

    Returns:
-        Variable: The weighted sum of the localization loss and confidence loss,
-            with shape [N * Np, 1], N and Np are the same as they are
-            in `location`.
+        The weighted sum of the localization loss and confidence loss, with \
+        shape [N * Np, 1], N and Np are the same as they are in `location`.

    Raises:
-        ValueError: If mining_type is 'hard_example', now only support
-            mining type of `max_negative`.
-
-    Examples:
-        .. code-block:: python
-
-            pb = layers.data(
-                name='prior_box',
-                shape=[10, 4],
-                append_batch_size=False,
-                dtype='float32')
-            pbv = layers.data(
-                name='prior_box_var',
-                shape=[10, 4],
-                append_batch_size=False,
-                dtype='float32')
-            loc = layers.data(name='target_box', shape=[10, 4], dtype='float32')
-            scores = layers.data(name='scores', shape=[10, 21], dtype='float32')
-            gt_box = layers.data(
-                name='gt_box', shape=[4], lod_level=1, dtype='float32')
-            gt_label = layers.data(
-                name='gt_label', shape=[1], lod_level=1, dtype='float32')
-            loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv)
+        ValueError: If mining_type is 'hard_example', now only support mining \
+        type of `max_negative`.
    """

    helper = LayerHelper('ssd_loss', **locals())

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1624,6 +1624,7 @@ def batch_norm(input,
    return helper.append_activation(batch_norm_out)


+@templatedoc()
 def layer_norm(input,
               scale=True,
               shift=True,
@@ -1634,16 +1635,7 @@ def layer_norm(input,
               act=None,
               name=None):
    """
-    **Layer Normalization**
-
-    Assume feature vectors exist on dimensions
-    :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
-    along these dimensions for each feature vector :math:`a` with size
-    :math:`H`, then normalize each feature vector using the corresponding
-    statistics. After that, apply learnable gain and bias on the normalized
-    tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
-
-    Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
+    ${comment}

    The formula is as follows:

@@ -1655,6 +1647,11 @@ def layer_norm(input,

        h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)

+    >>> import paddle.fluid as fluid
+    >>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
+    >>>                          dtype='float32')
+    >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
+
    Args:
        input(Variable): The input tensor variable.
        scale(bool): Whether to learn the adaptive gain :math:`g` after
@@ -1672,14 +1669,7 @@ def layer_norm(input,
        act(str): Activation to be applied to the output of layer normalizaiton.

    Returns:
-        Variable: A tensor variable with the same shape as the input.
-
-    Examples:
-        .. code-block:: python
-
-            data = fluid.layers.data(
-              name='data', shape=[3, 32, 32], dtype='float32')
-            x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
+        ${y_comment}
    """
    helper = LayerHelper('layer_norm', **locals())
    dtype = helper.input_dtype()
@@ -3184,29 +3174,19 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
    return out


+@templatedoc()
 def row_conv(input, future_context_size, param_attr=None, act=None):
-    """Row Conv Operator. This layer will apply lookahead convolution to
-    **input**. The input variable should be a 2D LoDTensor with shape [T, D].
-    Parameters with shape [future_context_size + 1, D] will be created. The math
-    equation of row convolution is as follows:
-
-    .. math::
-        Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j}
-
-    In the above equation:
+    """
+    ${comment}

-    * :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
-    * :math:`\\tau`: Future context size.
-    * :math:`X_{j}`: The j-th row of input variable with shape [1, D].
-    * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
+    >>> import paddle.fluid as fluid
+    >>> x = fluid.layers.data(name='x', shape=[16],
+    >>>                        dtype='float32', lod_level=1)
+    >>> out = fluid.layers.row_conv(input=x, future_context_size=2)

-    More details about row_conv please refer to the paper \
-    (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
-    the design document \
-    (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).

    Args:
-        input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
+        input (${x_type}): ${x_comment}.
        future_context_size (int): Future context size. Please note, the shape
            of convolution kernel is [future_context_size + 1, D].
        param_attr (ParamAttr): Attributes of parameters, including
@@ -3214,14 +3194,7 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
        act (str): Non-linear activation to be applied to output variable.

    Returns:
-        Variable: The output tensor with same shape as input tensor.
-
-    Examples:
-        .. code-block:: python
-
-            x = fluid.layers.data(name='x', shape=[16],
-                            dtype='float32', lod_level=1)
-            out = fluid.layers.row_conv(input=x, future_context_size=2)
+        ${out_comment}.
    """
    helper = LayerHelper('row_conv', **locals())
    dtype = helper.input_dtype()

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -49,7 +49,18 @@ def create_parameter(shape,
                     is_bias=False,
                     default_initializer=None):
    """
-    Create a parameter
+    Create a parameter. The parameter is a learnable variable, which can have
+    gradient, and can be optimized.
+
+    NOTE: this is a very low-level API. This API is useful when you create
+    operator by your self. instead of using layers.
+
+    >>> import paddle.fluid as fluid
+    >>> W = fluid.layers.create_parameter(shape=[784, 200], dtype='float32')
+    >>> data = fluid.layers.data(name="img", shape=[64, 784],
+    >>>           append_batch_size=False)
+    >>> hidden = fluid.layers.matmul(x=data, y=W)
+
    Args:
        shape(list[int]): shape of the parameter
        dtype(string): element type of the parameter
@@ -61,7 +72,7 @@ def create_parameter(shape,
        default_initializer(Initializer): initializer for the parameter

    Returns:
-        Parameter: the created parameter
+        the created parameter
    """
    helper = LayerHelper("create_parameter", **locals())
    if attr is None: