Merge pull request #6792 from lcy-seso/refine_doc

Refine the doc for mul_op and fully connected layer.

Merge pull request #6792 from lcy-seso/refine_doc
Refine the doc for mul_op and fully connected layer.
298dc895 · Cao Ying · GitHub · 22fba722 · a74db488 · 298dc895
7 changed file
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -467,7 +467,7 @@ lambda_cost
    :noindex:
 square_error_cost
--------
+-----------------
 ..  autoclass:: paddle.v2.layer.square_error_cost
    :noindex:
@@ -533,7 +533,7 @@ Miscs
 =====
 dropout
--------------
+--------
 ..  autoclass:: paddle.v2.layer.dropout
    :noindex:

--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -19,17 +19,17 @@ dynamic_lstm
    :noindex:
 data
---------
+----
 ..  autofunction:: paddle.v2.fluid.layers.data
    :noindex:
 mean
---------
+----
 ..  autofunction:: paddle.v2.fluid.layers.mean
    :noindex:
 mul
---------
+---
 ..  autofunction:: paddle.v2.fluid.layers.mul
    :noindex:
@@ -45,13 +45,13 @@ elementwise_div
 dropout
---------
+-------
 ..  autofunction:: paddle.v2.fluid.layers.dropout
    :noindex:
 reshape
---------
+--------
 ..  autofunction:: paddle.v2.fluid.layers.reshape
    :noindex:
@@ -81,67 +81,67 @@ transpose
 sigmoid_cross_entropy_with_logits
---------
+---------------------------------
 ..  autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits
    :noindex:
 cast
---------
+----
 ..  autofunction:: paddle.v2.fluid.layers.cast
    :noindex:
 concat
---------
+-------
 ..  autofunction:: paddle.v2.fluid.layers.concat
    :noindex:
 sums
---------
+----
 ..  autofunction:: paddle.v2.fluid.layers.sums
    :noindex:
 linear_chain_crf
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.linear_chain_crf
    :noindex:
 assign
---------
+-------
 ..  autofunction:: paddle.v2.fluid.layers.embedding
    :noindex:
 split_lod_tensor
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.split_lod_tensor
    :noindex:
 merge_lod_tensor
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
    :noindex:
 cos_sim
---------
+--------
 ..  autofunction:: paddle.v2.fluid.layers.cos_sim
    :noindex:
 cross_entropy
---------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.cross_entropy
    :noindex:
 square_error_cost
---------
+-----------------
 ..  autofunction:: paddle.v2.fluid.layers.square_error_cost
    :noindex:
@@ -153,68 +153,68 @@ accuracy
 sequence_conv
---------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.sequence_conv
    :noindex:
 conv2d
---------
+------
 ..  autofunction:: paddle.v2.fluid.layers.conv2d
    :noindex:
 sequence_pool
---------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.sequence_pool
    :noindex:
 pool2d
---------
+------
 ..  autofunction:: paddle.v2.fluid.layers.pool2d
    :noindex:
 batch_norm
---------
+----------
 ..  autofunction:: paddle.v2.fluid.layers.batch_norm
    :noindex:
 beam_search_decode
---------
+------------------
 ..  autofunction:: paddle.v2.fluid.layers.beam_search_decode
    :noindex:
 lod_rank_table
---------
+--------------
 ..  autofunction:: paddle.v2.fluid.layers.lod_rank_table
    :noindex:
 max_sequence_len
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.max_sequence_len
    :noindex:
 topk
---------
+-----
 ..  autofunction:: paddle.v2.fluid.layers.topk
    :noindex:
 lod_tensor_to_array
---------
+-------------------
 ..  autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
    :noindex:
 array_to_lod_tensor
---------
+-------------------
 ..  autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
    :noindex:
@@ -222,26 +222,26 @@ array_to_lod_tensor
 fill_constant
---------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.fill_constant
    :noindex:
 fill_constant_batch_size_like
---------
+-----------------------------
 ..  autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
    :noindex:
 ones
---------
+----
 ..  autofunction:: paddle.v2.fluid.layers.ones
    :noindex:
 zeros
---------
+-----
 ..  autofunction:: paddle.v2.fluid.layers.zeros
    :noindex:
@@ -253,14 +253,14 @@ increment
 array_write
---------
+-----------
 ..  autofunction:: paddle.v2.fluid.layers.array_write
    :noindex:
 create_array
---------
+------------
 ..  autofunction:: paddle.v2.fluid.layers.create_array
    :noindex:
@@ -272,31 +272,31 @@ less_than
 array_read
---------
+----------
 ..  autofunction:: paddle.v2.fluid.layers.array_read
    :noindex:
 shrink_memory
---------
+--------------
 ..  autofunction:: paddle.v2.fluid.layers.shrink_memory
    :noindex:
 array_length
---------
+-------------
 ..  autofunction:: paddle.v2.fluid.layers.array_length
    :noindex:
 conv2d_transpose
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.conv2d_transpose
    :noindex:
 sequence_expand
---------
+---------------
 ..  autofunction:: paddle.v2.fluid.layers.sequence_expand
    :noindex:
@@ -308,13 +308,13 @@ lstm_unit
 sequence_softmax
---------
+----------------
 ..  autofunction:: paddle.v2.fluid.layers.sequence_softmax
    :noindex:
 reduce_sum
---------
+----------
 ..  autofunction:: paddle.v2.fluid.layers.reduce_sum
    :noindex:
--- a/doc/api/v2/fluid/nets.rst
+++ b/doc/api/v2/fluid/nets.rst
@@ -3,19 +3,19 @@ Nets
 ===========
 simple_img_conv_pool
-----------
+--------------------
 ..  autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
    :noindex:
 img_conv_group
-----------
+---------------
 ..  autofunction:: paddle.v2.fluid.nets.img_conv_group
    :noindex:
 sequence_conv_pool
-----------
+------------------
 ..  autofunction:: paddle.v2.fluid.nets.sequence_conv_pool
    :noindex:

--- a/doc/api/v2/fluid/optimizer.rst
+++ b/doc/api/v2/fluid/optimizer.rst
@@ -18,7 +18,7 @@ SGDOptimizer
 MomentumOptimizer
-----------
+-----------------
 ..  automodule:: paddle.v2.fluid.optimizer
    :members: MomentumOptimizer
    :noindex:
@@ -26,14 +26,14 @@ MomentumOptimizer
 AdagradOptimizer
-----------
+----------------
 ..  automodule:: paddle.v2.fluid.optimizer
    :members: AdagradOptimizer
    :noindex:
 AdamOptimizer
-----------
+-------------
 ..  automodule:: paddle.v2.fluid.optimizer
    :members: AdamOptimizer
    :noindex:
@@ -47,7 +47,7 @@ AdamaxOptimizer
 DecayedAdagradOptimizer
-----------
+-----------------------
 ..  automodule:: paddle.v2.fluid.optimizer
    :members: DecayedAdagradOptimizer
    :noindex:

--- a/doc/api/v2/fluid/regularizer.rst
+++ b/doc/api/v2/fluid/regularizer.rst
@@ -3,14 +3,14 @@ Regularizer
 ===========
 WeightDecayRegularizer
-----------
+----------------------
 ..  automodule:: paddle.v2.fluid.regularizer
    :members: WeightDecayRegularizer
    :noindex:
 L2DecayRegularizer
-----------
+------------------
 ..  automodule:: paddle.v2.fluid.regularizer
    :members: L2DecayRegularizer
    :noindex:
@@ -18,7 +18,7 @@ L2DecayRegularizer
 L1DecayRegularizer
-----------
+-------------------
 ..  automodule:: paddle.v2.fluid.regularizer
    :members: L1DecayRegularizer

--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@@ -73,39 +73,50 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The first input of mul op");
+    AddInput("X", "(Tensor), The first input tensor of mul op.");
-    AddInput("Y", "The second input of mul op");
+    AddInput("Y", "(Tensor), The second input tensor of mul op.");
-    AddOutput("Out", "The output of mul op");
+    AddOutput("Out", "(Tensor), The output tensor of mul op.");
    AddAttr<int>(
        "x_num_col_dims",
-        "(int, default 1) "
+        R"DOC((int, default 1), The mul_op can take tensors with more than two
-        R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
+              dimensions as its inputs. If the input $X$ is a tensor with more
-            in that case, tensors will be reshaped to a matrix. The matrix's first
+              than two dimensions, $X$ will be flattened into a two-dimensional
-            dimension(column length) will be the product of tensor's last
+              matrix first. The flattening rule is: the first `num_col_dims`
-            `num_col_dims` dimensions, and the matrix's second dimension(row length)
+              will be flattened to form the first dimension of the final matrix
-            will be the product of tensor's first `rank - num_col_dims` dimensions.
+              (the height of the matrix), and the rest `rank(X) - num_col_dims`
+              dimensions are flattened to form the second dimension of the final
+              matrix (the width of the matrix). As a result, height of the
+              flattened matrix is equal to the product of $X$'s first
+              `x_num_col_dims` dimensions' sizes, and width of the flattened
+              matrix is equal to the product of $X$'s last `rank(x) - num_col_dims`
+              dimensions' size. For example, suppose $X$ is a 6-dimensional
+              tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3.
+              Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] =
+              [24, 30].
        )DOC")
        .SetDefault(1)
        .EqualGreaterThan(1);
    AddAttr<int>(
        "y_num_col_dims",
-        "(int, default 1) "
+        R"DOC((int, default 1), The mul_op can take tensors with more than two,
-        R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
+              dimensions as its inputs. If the input $Y$ is a tensor with more
-             in that case, tensors will be reshaped to a matrix. Just like input `X`.
+              than two dimensions, $Y$ will be flattened into a two-dimensional
+              matrix first. The attribute `y_num_col_dims` determines how $Y$ is
+              flattened. See comments of `x_num_col_dims` for more details.
        )DOC")
        .SetDefault(1)
        .EqualGreaterThan(1);
    AddComment(R"DOC(
 Mul Operator.
-This operator is used to perform matrix multiplication for input X and Y.
+This operator is used to perform matrix multiplication for input $X$ and $Y$.
 The equation is:
    $$Out = X * Y$$
-Both the input `X` and `Y` can carry the LoD (Level of Details) information,
+Both the input $X$ and $Y$ can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD information with input `X`.
+or not. But the output only shares the LoD information with input $X$.
 )DOC");
  }

--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -27,48 +27,81 @@ def fc(input,
    """
    **Fully Connected Layer**
-    This layer accepts multiple inputs and applies a linear transformation to each input.
+    The fully connected layer can take multiple tensors as its inputs. It
-    If activation type is provided, the corresponding activation function is applied to the
+    creates a variable (one for each input tensor) called weights for each input
-    output of the linear transformation. For each input :math:`X`, the equation is:
+    tensor, which represents a fully connected weight matrix from each input
+    unit to each output unit. The fully connected layer multiplies each input
+    tensor with its coresponding weight to produce an output Tensor. If
+    multiple input tensors are given, the results of multiple multiplications
+    will be sumed up. If bias_attr is not None, a biases variable will be
+    created and added to the output. Finally, if activation is not None,
+    it will be applied to the output as well.
+    This process can be formulated as follows:
    .. math::
-        Out = Act(WX + b)
+        Out = Act\left({\sum_{i=0}^{N-1}W_iX_i + b}\right)
    In the above equation:
-        * :math:`X`: Input value, a tensor with rank at least 2.
+    * :math:`N`: Number of the input.
-        * :math:`W`: Weight, a 2-D tensor with shape [M, N].
+    * :math:`X_i`: The input tensor.
-        * :math:`b`: Bias, a 2-D tensor with shape [M, 1].
+    * :math:`W`: The weights created by this layer.
-        * :math:`Act`: Activation function.
+    * :math:`b`: The bias parameter created by this layer (if needed).
-        * :math:`Out`: Output value, same shape with :math:`X`.
+    * :math:`Act`: The activation funtion.
+    * :math:`Out`: The output tensor.
-    All the input variables are passed in as local variables to the LayerHelper
-    constructor.
    Args:
-       input(Variable|list): Input tensors. Each tensor has a rank of atleast 2
+       input(Variable|list): The input tensor(s) to the fully connected layer.
-       size(int): Output size
+       size(int): The number of output units in the fully connected layer.
-       num_flatten_dims(int): Number of columns in input
+       num_flatten_dims(int): The fc layer can accept an input tensor with more
-       param_attr(ParamAttr|list): The parameters/weights to the FC Layer
+                              than two dimensions. If this happens, the
-       bias_attr(ParamAttr|list): Bias parameter for the FC layer
+                              multidimensional tensor will first be flattened
-       act(str): Activation type
+                              into a 2-dimensional matrix. The parameter
-       name(str): Name/alias of the function
+                              `num_flatten_dims` determines how the input tensor
+                              is flattened: the first `num_flatten_dims`
+                              dimensions will be flatten to form the first
+                              dimension of the final matrix (height of the
+                              matrix), and the rest `rank(X) - num_col_dims`
+                              dimensions are flattened to form the second
+                              dimension of the final matrix (width of the matrix).
+                              For example, suppose `X` is a 6-dimensional tensor
+                              with a shape [2, 3, 4, 5, 6], and
+                              `x_num_col_dims` = 3. Then, the flattened matrix
+                              will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
+                              By default, `x_num_col_dims` is set to 1.
+       param_attr(ParamAttr|list): The parameter attribute for learnable
+                                   parameters/weights of the fully connected
+                                   layer.
+       param_initializer(ParamAttr|list): The initializer used for the
+                                          weight/parameter. If set None,
+                                          XavierInitializer() will be used.
+       bias_attr(ParamAttr|list): The parameter attribute for the bias parameter
+                                  for this layer. If set None, no bias will be
+                                  added to the output units.
+       bias_initializer(ParamAttr|list): The initializer used for the bias.
+                                        If set None, then ConstantInitializer()
+                                        will be used.
+       act(str): Activation to be applied to the output of the fully connected
+                 layer.
+       name(str): Name/alias of the fully connected layer.
    Returns:
-        Variable: The tensor variable storing the transformation and \
+        Variable: The output tensor variable.
-                  non-linearity activation result.
    Raises:
-        ValueError: If rank of input tensor is less than 2.
+        ValueError: If rank of the input tensor is less than 2.
    Examples:
        .. code-block:: python
-          data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32')
+          data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
          fc = fluid.layers.fc(input=data, size=1000, act="tanh")
    """
-    helper = LayerHelper('fc', **locals())
+    helper = LayerHelper("fc", **locals())
    dtype = helper.input_dtype()
@@ -88,8 +121,8 @@ def fc(input,
                "Y": w,
            },
            outputs={"Out": tmp},
-            attrs={'x_num_col_dims': num_flatten_dims,
+            attrs={"x_num_col_dims": num_flatten_dims,
-                   'y_num_col_dims': 1})
+                   "y_num_col_dims": 1})
        mul_results.append(tmp)
    # sum