From 6a1e31291408ad172110374c9555f6705e30b92b Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 20 Dec 2017 16:25:19 +0800
Subject: [PATCH] refine the doc.

---
 paddle/operators/mul_op.cc          | 33 +++++++++----
 python/paddle/v2/fluid/layers/nn.py | 73 ++++++++++++++++++-----------
 2 files changed, 69 insertions(+), 37 deletions(-)
diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc
index a4bf0711d..25944e3d1 100644
--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@@ -73,25 +73,38 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The first input of mul op");
-    AddInput("Y", "The second input of mul op");
-    AddOutput("Out", "The output of mul op");
+    AddInput("X", "The first input tensor of the mul op.");
+    AddInput("Y", "The second input tensor of the mul op.");
+    AddOutput("Out", "The output tensor of the mul op.");
     AddAttr<int>(
         "x_num_col_dims",
         "(int, default 1) "
-        R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
-            in that case, tensors will be reshaped to a matrix. The matrix's first
-            dimension(column length) will be the product of tensor's last
-            `num_col_dims` dimensions, and the matrix's second dimension(row length)
-            will be the product of tensor's first `rank - num_col_dims` dimensions.
+        R"DOC(The mul_op can take tensors with more than two dimensions as its
+              inputs. If the input `X` is a tensor with more than two
+              dimensions, `X` will be flatten into a two-dimensional matrix
+              first. The flatten rule is: the first `num_col_dims` will be
+              flatten to form the first dimension of the matrix (height of the
+              matrix), and the rest `rank(X) - num_col_dims` dimensions are
+             flattened to form the second dimension of the matrix (width of the
+             matrix). As a result, height of the flattened matrix is equal to
+             the product of `X`'s first `x_num_col_dims` dimensions' sizes,
+             and width of the flattened matrix is equal to the product of `X`'s
+             last `rank(x) - num_col_dims` dimensions' size.
+             For example, suppose `X` is a 6-dimensional tensor with the shape
+             [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the flattened
+             matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
         )DOC")
         .SetDefault(1)
         .EqualGreaterThan(1);
     AddAttr<int>(
         "y_num_col_dims",
         "(int, default 1) "
-        R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
-             in that case, tensors will be reshaped to a matrix. Just like input `X`.
+        R"DOC(The mul_op can take tensors with more than two dimensions as its
+              inputs. If the input `Y` is a tensor with more than two
+              dimensions, `Y` will be flatten into a two-dimensional matrix
+              first. The attribute `y_num_col_dims` is used to flatten `Y` into
+              a two-dimensional matrix. See the comments of `x_num_col_dims` for
+              more details.
         )DOC")
         .SetDefault(1)
         .EqualGreaterThan(1);
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index 2c38c2322..71dab4e66 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -28,31 +28,52 @@ def fc(input,
     Fully Connected Layer.
 
     Args:
-       input: The input tensor to the function
-       size: The size of the layer
-       num_flatten_dims: Number of columns in input
-       param_attr: The parameters/weights to the FC Layer
-       param_initializer: Initializer used for the weight/parameter. If None, XavierInitializer() is used
-       bias_attr: The bias parameter for the FC layer
-       bias_initializer: Initializer used for the bias. If None, then ConstantInitializer() is used
-       act: Activation to be applied to the output of FC layer
-       name: Name/alias of the function
-       main_program: Name of the main program that calls this
-       startup_program: Name of the startup program
-
-    This function can take in multiple inputs and performs the Fully Connected
-    function (linear transformation) on top of each of them.
-    So for input x, the output will be : Wx + b. Where W is the parameter,
-    b the bias and x is the input.
-
-    The function also applies an activation (non-linearity) on top of the
-    output, if activation is passed in the input.
-
-    All the input variables of this function are passed in as local variables
-    to the LayerHelper constructor.
+       input: The input tensor(s) to the fully connected layer.
+       size: The number of output units in the fully connected layer.
+       num_flatten_dims: The fc layer can accept an input tensor with more than
+                         two dimensions. If this happens, the multidimensional
+                         tensor will first be flattened into a 2-dimensional
+                         matrix. The parameter `num_flatten_dims` determines
+                         how the input tensor is flattened: the first
+                         `num_flatten_dims` dimensions will be flatten to form
+                         the first dimension of the final matrix (height of the
+                         matrix), and the rest `rank(X) - num_col_dims`
+                         dimensions are flattened to form the second dimension
+                         of the final matrix (width of the matrix). For example,
+                         suppose `X` is a 6-dimensional tensor with a shape
+                         [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the
+                         flattened matrix will have a shape [2 x 3 x 4, 5 x 6]
+                         = [24, 30]. By default, `x_num_col_dims` is set to 1.
+       param_attr: The parameter attribute for learnable parameters/weights of
+                   the fully connected Layer.
+       param_initializer: The initializer used for the weight/parameter.
+                          If set None, XavierInitializer() will be used.
+       bias_attr: The parameter attribute for the bias parameter for this layer.
+                  If set None, no bias will be added to the output units.
+       bias_initializer: The initializer used for the bias. If set None,
+                         then ConstantInitializer() will be used.
+       act: Activation to be applied to the output of the fully connected layer.
+       name: Name/alias of the fully connected layer.
+
+    The fully connected can take multiple tensor as inputs. It creates a
+    variable (one for each input tensor) called weights which represents a
+    fully connected weight matrix from each input unit to each output unit.
+    The fully connected layer multiplies each input tensor with its coresponding
+    weight to produce an output Tensor. If multiple input tensors are given,
+    the results of multiple multiplications will be sumed up. If bias_attr is
+    not None, a biases variable will be created and added to the output.
+    Finally, if activation is not None, it will be applied to the output as well.
+
+    This process canbe formulated as follows:
+
+    .. math::
+        Y = \sigma({\sum_{i=0}^{N-1}W_iX_i + b})
+
+    where, :math:`N` is the number of input, :math:`X_i` is the input tensor,
+    :math`W` is the weights created by this layer, :math:`b` is the bias.
 
     """
-    helper = LayerHelper('fc', **locals())
+    helper = LayerHelper("fc", **locals())
 
     dtype = helper.input_dtype()
 
@@ -72,8 +93,8 @@ def fc(input,
                 "Y": w,
             },
             outputs={"Out": tmp},
-            attrs={'x_num_col_dims': num_flatten_dims,
-                   'y_num_col_dims': 1})
+            attrs={"x_num_col_dims": num_flatten_dims,
+                   "y_num_col_dims": 1})
         mul_results.append(tmp)
 
     # sum
@@ -100,8 +121,6 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
        is_sparse: A flag that decleares whether the input is sparse
        param_attr: Parameters for this layer
        dtype: The type of data : float32, float_16, int etc
-       main_program: Name of the main program that calls this
-       startup_program: Name of the startup program
 
     This function can take in the input (which is a vector of IDs) and
     performs a lookup in the lookup_table using these IDs, to result into
-- 
GitLab