diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 96e4c0e04cc30db6d0b86376434d5ea02694ae21..7a567a83f9e8750230e6edc1970630ee278ba2f4 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -276,13 +276,12 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( HardShrink Activation Operator. -$$ -out = \begin{cases} - x, \text{if } x > \lambda \\ - x, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ +.. math:: + out = \begin{cases} + x, \text{if } x > \lambda \\ + x, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} )DOC"); } diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index ab097d31e9ab5eafa788539170e7e405df697625..14ce1da2e97186a50ed8bd52223a500c4c57b328 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel { class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "(LoDTensor) The input tensor."); + AddInput("X", "The input tensor."); AddInput("Scale", - "(Tensor, optional) Scale is a 1-dimensional tensor of size " + "(optional) Scale is a 1-dimensional tensor of size " "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "It is applied to the output.") .AsDispensable(); AddInput("Bias", - "(Tensor, optional) Bias is a 1-dimensional tensor of size " + "(optional) Bias is a 1-dimensional tensor of size " "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "It is applied to the output.") .AsDispensable(); - AddOutput("Y", "(LoDTensor) Result after normalization."); - AddOutput("Mean", "(Tensor) Mean of the current mini batch.") - .AsIntermediate(); - AddOutput("Variance", "(Tensor) Variance of the current mini batch.") + AddOutput("Y", "Result after normalization."); + AddOutput("Mean", "Mean of the current mini batch.").AsIntermediate(); + AddOutput("Variance", "Variance of the current mini batch.") .AsIntermediate(); AddAttr("epsilon", - "(float, default 1e-5) Constant for " - "numerical stability") + "Constant for numerical stability [default 1e-5].") .SetDefault(1e-5) .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, "'epsilon' should be between 0.0 and 0.001."); }); AddAttr("begin_norm_axis", - "(int default:1), the " - "axis of `begin_norm_axis ... Rank(X) - 1` will be " + "the axis of `begin_norm_axis ... Rank(X) - 1` will be " "normalized. `begin_norm_axis` splits the tensor(`X`) to a " - "matrix [N,H].") + "matrix [N,H]. [default 1].") .SetDefault(1) .AddCustomChecker([](const int &begin_norm_axis) { PADDLE_ENFORCE_GT(begin_norm_axis, 0, @@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { }); AddComment(R"DOC( -Layer Normalization. -Layer Norm has been implemented as discussed in the paper: -https://arxiv.org/abs/1607.06450 -... +Assume feature vectors exist on dimensions +:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics +along these dimensions for each feature vector :math:`a` with size +:math:`H`, then normalize each feature vector using the corresponding +statistics. After that, apply learnable gain and bias on the normalized +tensor to scale and shift if :attr:`scale` and :attr:`shift` are set. + +Refer to `Layer Normalization `_ )DOC"); } }; diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 20f140f962c3aac364a1239a663d5f340bbeb6b2..f4b540f1cbb1d528ba8dd32ebcf15eb839637816 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -78,18 +78,18 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(LoDTensor), the input(X) is a LodTensor, which supports " + "the input(X) is a LodTensor, which supports " "variable time-length input sequences. The underlying tensor " "in this LoDTensor is a matrix with shape (T x N), where T " "is the total time steps in this mini-batch and N is the input " "data dimension."); AddInput("Filter", - "(Tensor), the input(Filter) is a learnable parameter. It " + "the input(Filter) is a learnable parameter. It " "is a 2-D tensor with shape (future_context x N), where, " "future_context is the future context length and N is the data " "dimension."); AddOutput("Out", - "(LoDTensor), the output(Out) is a LodTensor, which supports " + "the output(Out) is a LodTensor, which supports " "variable time-length input sequences. The underlying tensor " "in this LodTensor is a matrix with shape T x N, i.e., the " "same shape as X."); @@ -117,6 +117,20 @@ $$ out_{i, :} = \sum_{j=i}^{i + context} in_{j,:} \dot W_{i-j, :} $$ +In the above equation: + +* $Out_{i}$: The i-th row of output variable with shape [1, D]. + +* $\\tau$: Future context size. + +* $X_{j}$: The j-th row of input variable with shape [1, D]. + +* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D]. + +More details about row_conv please refer to +the design document +https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 . + )DOC"); } }; diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 137ea91caedabc3167146d91b063dbe9e2e2b931..65525526c9a18b7367028c4348636f1871810030 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -86,32 +86,26 @@ class UniformRandomOp : public framework::OperatorWithKernel { class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddOutput("Out", "(Tensor) The output tensor of uniform random op"); + AddOutput("Out", "The output tensor of uniform random op"); AddComment(R"DOC( Uniform random operator. This operator initializes a tensor with random values sampled from a -uniform distribution. +uniform distribution. The random result is in set [min, max]. )DOC"); - AddAttr>("shape", - "(vector) The shape of the output tensor"); - AddAttr("min", - "(float, default -1.0) " - "Minimum value of uniform random") + AddAttr>("shape", "The shape of the output tensor"); + AddAttr("min", "Minimum value of uniform random. [default -1.0].") .SetDefault(-1.0f); - AddAttr("max", - "(float, default 1.0) " - "Maximun value of uniform random") + AddAttr("max", "Maximun value of uniform random. [default 1.0].") .SetDefault(1.0f); AddAttr("seed", - "(int, default 0) " "Random seed used for generating samples. " "0 means use a seed generated by the system." "Note that if seed is not 0, this operator will always " - "generate the same random numbers every time.") + "generate the same random numbers every time. [default 0].") .SetDefault(0); - AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") + AddAttr("dtype", "Output tensor data type. [default 5(FP32)].") .SetDefault(framework::proto::VarType::FP32); } }; diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 3a83db12fd13651578deeac6b562bac2f1e4e4b6..1e8dfbe52181bb6bada178f656bdeddb5400ebee 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -373,22 +373,55 @@ def ssd_loss(location, confidence loss (or classification loss) by performing the following steps: 1. Find matched boundding box by bipartite matching algorithm. + 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. + 1.2 Compute matched boundding box by bipartite matching algorithm. + 2. Compute confidence for mining hard examples + 2.1. Get the target label based on matched indices. + 2.2. Compute confidence loss. + 3. Apply hard example mining to get the negative example indices and update the matched indices. + 4. Assign classification and regression targets + 4.1. Encoded bbox according to the prior boxes. + 4.2. Assign regression targets. + 4.3. Assign classification targets. + 5. Compute the overall objective loss. + 5.1 Compute confidence loss. + 5.1 Compute localization loss. + 5.3 Compute the overall weighted loss. + >>> import paddle.fluid.layers as layers + >>> pb = layers.data( + >>> name='prior_box', + >>> shape=[10, 4], + >>> append_batch_size=False, + >>> dtype='float32') + >>> pbv = layers.data( + >>> name='prior_box_var', + >>> shape=[10, 4], + >>> append_batch_size=False, + >>> dtype='float32') + >>> loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') + >>> scores = layers.data(name='scores', shape=[10, 21], dtype='float32') + >>> gt_box = layers.data( + >>> name='gt_box', shape=[4], lod_level=1, dtype='float32') + >>> gt_label = layers.data( + >>> name='gt_label', shape=[1], lod_level=1, dtype='float32') + >>> loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) + Args: location (Variable): The location predictions are a 3D Tensor with shape [N, Np, 4], N is the batch size, Np is total number of @@ -426,34 +459,12 @@ def ssd_loss(location, mining_type is 'hard_example'. Returns: - Variable: The weighted sum of the localization loss and confidence loss, - with shape [N * Np, 1], N and Np are the same as they are - in `location`. + The weighted sum of the localization loss and confidence loss, with \ + shape [N * Np, 1], N and Np are the same as they are in `location`. Raises: - ValueError: If mining_type is 'hard_example', now only support - mining type of `max_negative`. - - Examples: - .. code-block:: python - - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') - scores = layers.data(name='scores', shape=[10, 21], dtype='float32') - gt_box = layers.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32') - gt_label = layers.data( - name='gt_label', shape=[1], lod_level=1, dtype='float32') - loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) + ValueError: If mining_type is 'hard_example', now only support mining \ + type of `max_negative`. """ helper = LayerHelper('ssd_loss', **locals()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index b9ea74fc81e0eb9b52e9cd1e9af8cba005a10f21..ba13b344a132a82fbcfb0d0b7ac7f18a3974f80a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1624,6 +1624,7 @@ def batch_norm(input, return helper.append_activation(batch_norm_out) +@templatedoc() def layer_norm(input, scale=True, shift=True, @@ -1634,20 +1635,11 @@ def layer_norm(input, act=None, name=None): """ - **Layer Normalization** - - Assume feature vectors exist on dimensions - :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics - along these dimensions for each feature vector :math:`a` with size - :math:`H`, then normalize each feature vector using the corresponding - statistics. After that, apply learnable gain and bias on the normalized - tensor to scale and shift if :attr:`scale` and :attr:`shift` are set. - - Refer to `Layer Normalization `_ + ${comment} The formula is as follows: - .. math:: + .. math:: \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i @@ -1655,6 +1647,11 @@ def layer_norm(input, h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) + >>> import paddle.fluid as fluid + >>> data = fluid.layers.data(name='data', shape=[3, 32, 32], + >>> dtype='float32') + >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) + Args: input(Variable): The input tensor variable. scale(bool): Whether to learn the adaptive gain :math:`g` after @@ -1672,14 +1669,7 @@ def layer_norm(input, act(str): Activation to be applied to the output of layer normalizaiton. Returns: - Variable: A tensor variable with the same shape as the input. - - Examples: - .. code-block:: python - - data = fluid.layers.data( - name='data', shape=[3, 32, 32], dtype='float32') - x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) + ${y_comment} """ helper = LayerHelper('layer_norm', **locals()) dtype = helper.input_dtype() @@ -3184,29 +3174,19 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): return out +@templatedoc() def row_conv(input, future_context_size, param_attr=None, act=None): - """Row Conv Operator. This layer will apply lookahead convolution to - **input**. The input variable should be a 2D LoDTensor with shape [T, D]. - Parameters with shape [future_context_size + 1, D] will be created. The math - equation of row convolution is as follows: - - .. math:: - Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j} - - In the above equation: + """ + ${comment} - * :math:`Out_{i}`: The i-th row of output variable with shape [1, D]. - * :math:`\\tau`: Future context size. - * :math:`X_{j}`: The j-th row of input variable with shape [1, D]. - * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D]. + >>> import paddle.fluid as fluid + >>> x = fluid.layers.data(name='x', shape=[16], + >>> dtype='float32', lod_level=1) + >>> out = fluid.layers.row_conv(input=x, future_context_size=2) - More details about row_conv please refer to the paper \ - (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and - the design document \ - (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645). Args: - input (Variable): Input variable, a 2D LoDTensor with shape [T, D]. + input (${x_type}): ${x_comment}. future_context_size (int): Future context size. Please note, the shape of convolution kernel is [future_context_size + 1, D]. param_attr (ParamAttr): Attributes of parameters, including @@ -3214,14 +3194,7 @@ def row_conv(input, future_context_size, param_attr=None, act=None): act (str): Non-linear activation to be applied to output variable. Returns: - Variable: The output tensor with same shape as input tensor. - - Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[16], - dtype='float32', lod_level=1) - out = fluid.layers.row_conv(input=x, future_context_size=2) + ${out_comment}. """ helper = LayerHelper('row_conv', **locals()) dtype = helper.input_dtype() diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 66db6fe13fb88fca93982c5a657418799556bc32..dbffcae86dc4fd7d94b30dac5cba7a554ad2a729 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -49,7 +49,18 @@ def create_parameter(shape, is_bias=False, default_initializer=None): """ - Create a parameter + Create a parameter. The parameter is a learnable variable, which can have + gradient, and can be optimized. + + NOTE: this is a very low-level API. This API is useful when you create + operator by your self. instead of using layers. + + >>> import paddle.fluid as fluid + >>> W = fluid.layers.create_parameter(shape=[784, 200], dtype='float32') + >>> data = fluid.layers.data(name="img", shape=[64, 784], + >>> append_batch_size=False) + >>> hidden = fluid.layers.matmul(x=data, y=W) + Args: shape(list[int]): shape of the parameter dtype(string): element type of the parameter @@ -61,7 +72,7 @@ def create_parameter(shape, default_initializer(Initializer): initializer for the parameter Returns: - Parameter: the created parameter + the created parameter """ helper = LayerHelper("create_parameter", **locals()) if attr is None: