From 6a1e31291408ad172110374c9555f6705e30b92b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 20 Dec 2017 16:25:19 +0800 Subject: [PATCH] refine the doc. --- paddle/operators/mul_op.cc | 33 +++++++++---- python/paddle/v2/fluid/layers/nn.py | 73 ++++++++++++++++++----------- 2 files changed, 69 insertions(+), 37 deletions(-) diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index a4bf0711d..25944e3d1 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -73,25 +73,38 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: MulOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of mul op"); - AddInput("Y", "The second input of mul op"); - AddOutput("Out", "The output of mul op"); + AddInput("X", "The first input tensor of the mul op."); + AddInput("Y", "The second input tensor of the mul op."); + AddOutput("Out", "The output tensor of the mul op."); AddAttr( "x_num_col_dims", "(int, default 1) " - R"DOC(mul_op can take tensors with more than two dimensions as input `X`, - in that case, tensors will be reshaped to a matrix. The matrix's first - dimension(column length) will be the product of tensor's last - `num_col_dims` dimensions, and the matrix's second dimension(row length) - will be the product of tensor's first `rank - num_col_dims` dimensions. + R"DOC(The mul_op can take tensors with more than two dimensions as its + inputs. If the input `X` is a tensor with more than two + dimensions, `X` will be flatten into a two-dimensional matrix + first. The flatten rule is: the first `num_col_dims` will be + flatten to form the first dimension of the matrix (height of the + matrix), and the rest `rank(X) - num_col_dims` dimensions are + flattened to form the second dimension of the matrix (width of the + matrix). As a result, height of the flattened matrix is equal to + the product of `X`'s first `x_num_col_dims` dimensions' sizes, + and width of the flattened matrix is equal to the product of `X`'s + last `rank(x) - num_col_dims` dimensions' size. + For example, suppose `X` is a 6-dimensional tensor with the shape + [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the flattened + matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. )DOC") .SetDefault(1) .EqualGreaterThan(1); AddAttr( "y_num_col_dims", "(int, default 1) " - R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, - in that case, tensors will be reshaped to a matrix. Just like input `X`. + R"DOC(The mul_op can take tensors with more than two dimensions as its + inputs. If the input `Y` is a tensor with more than two + dimensions, `Y` will be flatten into a two-dimensional matrix + first. The attribute `y_num_col_dims` is used to flatten `Y` into + a two-dimensional matrix. See the comments of `x_num_col_dims` for + more details. )DOC") .SetDefault(1) .EqualGreaterThan(1); diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 2c38c2322..71dab4e66 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -28,31 +28,52 @@ def fc(input, Fully Connected Layer. Args: - input: The input tensor to the function - size: The size of the layer - num_flatten_dims: Number of columns in input - param_attr: The parameters/weights to the FC Layer - param_initializer: Initializer used for the weight/parameter. If None, XavierInitializer() is used - bias_attr: The bias parameter for the FC layer - bias_initializer: Initializer used for the bias. If None, then ConstantInitializer() is used - act: Activation to be applied to the output of FC layer - name: Name/alias of the function - main_program: Name of the main program that calls this - startup_program: Name of the startup program - - This function can take in multiple inputs and performs the Fully Connected - function (linear transformation) on top of each of them. - So for input x, the output will be : Wx + b. Where W is the parameter, - b the bias and x is the input. - - The function also applies an activation (non-linearity) on top of the - output, if activation is passed in the input. - - All the input variables of this function are passed in as local variables - to the LayerHelper constructor. + input: The input tensor(s) to the fully connected layer. + size: The number of output units in the fully connected layer. + num_flatten_dims: The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multidimensional + tensor will first be flattened into a 2-dimensional + matrix. The parameter `num_flatten_dims` determines + how the input tensor is flattened: the first + `num_flatten_dims` dimensions will be flatten to form + the first dimension of the final matrix (height of the + matrix), and the rest `rank(X) - num_col_dims` + dimensions are flattened to form the second dimension + of the final matrix (width of the matrix). For example, + suppose `X` is a 6-dimensional tensor with a shape + [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. Then, the + flattened matrix will have a shape [2 x 3 x 4, 5 x 6] + = [24, 30]. By default, `x_num_col_dims` is set to 1. + param_attr: The parameter attribute for learnable parameters/weights of + the fully connected Layer. + param_initializer: The initializer used for the weight/parameter. + If set None, XavierInitializer() will be used. + bias_attr: The parameter attribute for the bias parameter for this layer. + If set None, no bias will be added to the output units. + bias_initializer: The initializer used for the bias. If set None, + then ConstantInitializer() will be used. + act: Activation to be applied to the output of the fully connected layer. + name: Name/alias of the fully connected layer. + + The fully connected can take multiple tensor as inputs. It creates a + variable (one for each input tensor) called weights which represents a + fully connected weight matrix from each input unit to each output unit. + The fully connected layer multiplies each input tensor with its coresponding + weight to produce an output Tensor. If multiple input tensors are given, + the results of multiple multiplications will be sumed up. If bias_attr is + not None, a biases variable will be created and added to the output. + Finally, if activation is not None, it will be applied to the output as well. + + This process canbe formulated as follows: + + .. math:: + Y = \sigma({\sum_{i=0}^{N-1}W_iX_i + b}) + + where, :math:`N` is the number of input, :math:`X_i` is the input tensor, + :math`W` is the weights created by this layer, :math:`b` is the bias. """ - helper = LayerHelper('fc', **locals()) + helper = LayerHelper("fc", **locals()) dtype = helper.input_dtype() @@ -72,8 +93,8 @@ def fc(input, "Y": w, }, outputs={"Out": tmp}, - attrs={'x_num_col_dims': num_flatten_dims, - 'y_num_col_dims': 1}) + attrs={"x_num_col_dims": num_flatten_dims, + "y_num_col_dims": 1}) mul_results.append(tmp) # sum @@ -100,8 +121,6 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): is_sparse: A flag that decleares whether the input is sparse param_attr: Parameters for this layer dtype: The type of data : float32, float_16, int etc - main_program: Name of the main program that calls this - startup_program: Name of the startup program This function can take in the input (which is a vector of IDs) and performs a lookup in the lookup_table using these IDs, to result into -- GitLab