refine the doc.

6a1e3129 · caoying03 · 2d5ec16b · 6a1e3129 · 6a1e3129
隐藏空白更改
内联并排

Showing with 69 addition and 37 deletion

paddle/operators/mul_op.cc paddle/operators/mul_op.cc +23 -10

python/paddle/v2/fluid/layers/nn.py python/paddle/v2/fluid/layers/nn.py +46 -27

未找到文件。
--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@@ -73,25 +73,38 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The first input of mul op");
-    AddInput("Y", "The second input of mul op");
-    AddOutput("Out", "The output of mul op");
+    AddInput("X", "The first input tensor of the mul op.");
+    AddInput("Y", "The second input tensor of the mul op.");
+    AddOutput("Out", "The output tensor of the mul op.");
    AddAttr<int>(
        "x_num_col_dims",
        "(int, default 1) "
-        R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
-            in that case, tensors will be reshaped to a matrix. The matrix's first
-            dimension(column length) will be the product of tensor's last
-            `num_col_dims` dimensions, and the matrix's second dimension(row length)
-            will be the product of tensor's first `rank - num_col_dims` dimensions.
+        R"DOC(The mul_op can take tensors with more than two dimensions as its
+              inputs. If the input `X` is a tensor with more than two
+              dimensions, `X` will be flatten into a two-dimensional matrix
+              first. The flatten rule is: the first `num_col_dims` will be
+              flatten to form the first dimension of the matrix (height of the
+              matrix), and the rest `rank(X) - num_col_dims` dimensions are
+             flattened to form the second dimension of the matrix (width of the
+             matrix). As a result, height of the flattened matrix is equal to
+             the product of `X`'s first `x_num_col_dims` dimensions' sizes,
+             and width of the flattened matrix is equal to the product of `X`'s
+             last `rank(x) - num_col_dims` dimensions' size.
+             For example, suppose `X` is a 6-dimensional tensor with the shape
+             [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the flattened
+             matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
        )DOC")
        .SetDefault(1)
        .EqualGreaterThan(1);
    AddAttr<int>(
        "y_num_col_dims",
        "(int, default 1) "
-        R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
-             in that case, tensors will be reshaped to a matrix. Just like input `X`.
+        R"DOC(The mul_op can take tensors with more than two dimensions as its
+              inputs. If the input `Y` is a tensor with more than two
+              dimensions, `Y` will be flatten into a two-dimensional matrix
+              first. The attribute `y_num_col_dims` is used to flatten `Y` into
+              a two-dimensional matrix. See the comments of `x_num_col_dims` for
+              more details.
        )DOC")
        .SetDefault(1)
        .EqualGreaterThan(1);

--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -28,31 +28,52 @@ def fc(input,
    Fully Connected Layer.

    Args:
-       input: The input tensor to the function
-       size: The size of the layer
-       num_flatten_dims: Number of columns in input
-       param_attr: The parameters/weights to the FC Layer
-       param_initializer: Initializer used for the weight/parameter. If None, XavierInitializer() is used
-       bias_attr: The bias parameter for the FC layer
-       bias_initializer: Initializer used for the bias. If None, then ConstantInitializer() is used
-       act: Activation to be applied to the output of FC layer
-       name: Name/alias of the function
-       main_program: Name of the main program that calls this
-       startup_program: Name of the startup program
-
-    This function can take in multiple inputs and performs the Fully Connected
-    function (linear transformation) on top of each of them.
-    So for input x, the output will be : Wx + b. Where W is the parameter,
-    b the bias and x is the input.
-
-    The function also applies an activation (non-linearity) on top of the
-    output, if activation is passed in the input.
-
-    All the input variables of this function are passed in as local variables
-    to the LayerHelper constructor.
+       input: The input tensor(s) to the fully connected layer.
+       size: The number of output units in the fully connected layer.
+       num_flatten_dims: The fc layer can accept an input tensor with more than
+                         two dimensions. If this happens, the multidimensional
+                         tensor will first be flattened into a 2-dimensional
+                         matrix. The parameter `num_flatten_dims` determines
+                         how the input tensor is flattened: the first
+                         `num_flatten_dims` dimensions will be flatten to form
+                         the first dimension of the final matrix (height of the
+                         matrix), and the rest `rank(X) - num_col_dims`
+                         dimensions are flattened to form the second dimension
+                         of the final matrix (width of the matrix). For example,
+                         suppose `X` is a 6-dimensional tensor with a shape
+                         [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the
+                         flattened matrix will have a shape [2 x 3 x 4, 5 x 6]
+                         = [24, 30]. By default, `x_num_col_dims` is set to 1.
+       param_attr: The parameter attribute for learnable parameters/weights of
+                   the fully connected Layer.
+       param_initializer: The initializer used for the weight/parameter.
+                          If set None, XavierInitializer() will be used.
+       bias_attr: The parameter attribute for the bias parameter for this layer.
+                  If set None, no bias will be added to the output units.
+       bias_initializer: The initializer used for the bias. If set None,
+                         then ConstantInitializer() will be used.
+       act: Activation to be applied to the output of the fully connected layer.
+       name: Name/alias of the fully connected layer.
+
+    The fully connected can take multiple tensor as inputs. It creates a
+    variable (one for each input tensor) called weights which represents a
+    fully connected weight matrix from each input unit to each output unit.
+    The fully connected layer multiplies each input tensor with its coresponding
+    weight to produce an output Tensor. If multiple input tensors are given,
+    the results of multiple multiplications will be sumed up. If bias_attr is
+    not None, a biases variable will be created and added to the output.
+    Finally, if activation is not None, it will be applied to the output as well.
+
+    This process canbe formulated as follows:
+
+    .. math::
+        Y = \sigma({\sum_{i=0}^{N-1}W_iX_i + b})
+
+    where, :math:`N` is the number of input, :math:`X_i` is the input tensor,
+    :math`W` is the weights created by this layer, :math:`b` is the bias.

    """
-    helper = LayerHelper('fc', **locals())
+    helper = LayerHelper("fc", **locals())

    dtype = helper.input_dtype()

@@ -72,8 +93,8 @@ def fc(input,
                "Y": w,
            },
            outputs={"Out": tmp},
-            attrs={'x_num_col_dims': num_flatten_dims,
-                   'y_num_col_dims': 1})
+            attrs={"x_num_col_dims": num_flatten_dims,
+                   "y_num_col_dims": 1})
        mul_results.append(tmp)

    # sum
@@ -100,8 +121,6 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
       is_sparse: A flag that decleares whether the input is sparse
       param_attr: Parameters for this layer
       dtype: The type of data : float32, float_16, int etc
-       main_program: Name of the main program that calls this
-       startup_program: Name of the startup program

    This function can take in the input (which is a vector of IDs) and
    performs a lookup in the lookup_table using these IDs, to result into