diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index 2a2a1e9cfd680ff983f54e4c12c34fbb5af69ca0..eaafb9ad54b371837cbc0f3268f7f2bf169e83e8 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel { auto inference_dim = ctx->GetInputDim("Out"); auto label_dim = ctx->GetInputDim("Label"); - // Assume indices has same shape with infernece, because + // Assume indices has same shape as inference, because // it's the output of topk. PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2."); @@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { // TODO(typhoonzero): support both inference value and indices. - AddInput("Out", "topk (inferences) the network output"); - AddInput("Indices", "topk (indices) the network output"); + AddInput("Out", "The network output of topk (inferences)"); + AddInput("Indices", "The the network output of topk (indices)"); AddInput("Label", "Label of the training data"); // TODO(typhoonzero): AddInput("Weight", ... AddOutput("Accuracy", "The accuracy of current batch"); AddComment(R"DOC( -Accuracy. It will print accuracy rate for classification. -The accuracy is: -.. math:: -accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples}) +Accuracy Operator. + +It will print accuracy rate for classification. +The accuracy is calculated as follows: + +$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$ + +Both the input Out and Label can carry the LoD (Level of Details) +information, or not. But the output only shares the LoD information +with the input Out(Inference). -Both the input `Out` and `Label` can carry the LoD (Level of Details) -information, or not. But the output only shares the LoD with input `Inference`. )DOC"); } }; diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 4288f300dd5b0464f2b4394cdb0b44f93060ae74..62190ebc217be49f549cedfb2de24b9d138fff48 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker { "workspace is a section of GPU memory which will be " "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " - "better hardward. This size should be carefully setted.") + "better hardware. This size should be chosen carefully.") .SetDefault(4096); } }; diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index 55f69fb03ad69c94dc4ebb8edd651d84e06a5f46..312264ccd48d1405a247a2c864d9f5897c897bea 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Cosine Similarity Operator. -The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)). +$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$ -The input `X` and `Y` must have the same shape, except that the 1st dimension -of input `Y` could be just 1 (different from input `X`), which will be -broadcasted to match the shape of input `X` before computing their cosine +The input X and Y must have the same shape, except that the 1st dimension +of input Y could be just 1 (different from input X), which will be +broadcasted to match the shape of input X before computing their cosine similarity. -Both the input `X` and `Y` can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD with input `X`. +Both the input X and Y can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input X. + )DOC"); } }; diff --git a/paddle/operators/crop_op.cc b/paddle/operators/crop_op.cc index ed78e9e3a3a49b7ff0990b8d13cfe2dae594b722..6752eb8c1c72150b0b1cf5595211ca1d01ef2bf4 100644 --- a/paddle/operators/crop_op.cc +++ b/paddle/operators/crop_op.cc @@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of pad op. " - "The input should be a k-D tensor(k > 0 and k < 7)"); + "The input should be a k-D tensor(k > 0 and k < 7)."); AddInput("Y", - "The input used as reference for cropping" - " with the same dimension as X. ") + "The input used as reference for cropping, " + "which is of the same dimensions as X.") .AsDispensable(); AddOutput("Out", - "The output of crop op " - "with the same dimension as X."); + "The output of crop op, " + "which is of the same dimensions as X."); AddAttr>("offsets", - "A list describing offsets to be cropped." - "The size of offsets list should be as same as " - "dimension size of input X."); + "A list describing offsets to be cropped. " + "The size of offsets list should be the same as " + "the dimension size of input X."); AddAttr>("shape", - "A list describing the shape of output." - "The size of shape list should be as same as " - "dimension size of input X.") + "A list describing the shape of output. " + "The size of shape list should be the same as " + "the dimension size of input X.") .SetDefault(std::vector()); AddComment(R"DOC( Crop Operator. + Crop input into output, as specified by offsets and shape. There are two ways to set shape: -1. referenc input: crop input X as shape as reference input. +1. reference input: crop input X into the same shape as reference input. The dimension of reference input should - be as same as input X. -2. shape list: crop input X by shape described by a list. - The size of shape list should be as same as - dimension size of input X. + be the same as the dimension of input X. +2. shape list: crop input X into the shape described by a list. + The size of shape list should be the same as + the dimension size of input X. The input should be a k-D tensor(k > 0 and k < 7). As an example: @@ -91,20 +92,20 @@ Given: X = [[0, 1, 2, 0, 0] [0, 3, 4, 0, 0] - [0, 0, 0, 0, 0]] + [0, 0, 0, 0, 0]], and - offsets = [0, 1] + offsets = [0, 1], and - shape = [2, 2] + shape = [2, 2], -then we get +we get: Out = [[1, 2], - [3, 4]] + [3, 4]]. )DOC"); } diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 39df19da677a7dee7d0989d491f8d5511f73a9c7..3ed41933b15059ae566a233c0ca5298c7f60bd86 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -117,9 +117,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { "Label", "(Tensor, default Tensor), the ground truth which is " "a 2-D tensor. " - "When soft_label is set to false, `Label` is a Tensor with shape " + "When soft_label is set to false, Label is a Tensor with shape " "[N x 1]. " - "When soft_label is set to true, `Label` is a Tensor " + "When soft_label is set to true, Label is a Tensor " "with shape [N x K]."); AddOutput("Y", "(Tensor, default Tensor), a 2-D tensor " @@ -137,13 +137,13 @@ computation. 1) One-hot cross-entropy: soft_label = false, Label[i, 0] indicates the class index for sample i: - Y[i] = -log(X[i, Label[i]]) + $Y[i] = -\log(X[i, Label[i]])$ 2) Soft-label cross-entropy: soft_label = true, Label[i, j] indicates the soft label of class j for sample i: - Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} + $Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$ Please make sure that in this case the summuation of each row of Label equals one. @@ -153,8 +153,9 @@ computation. non-zero element (equals 1), soft-label cross-entropy degenerates to a one-hot cross-entropy with one-hot label representation. -Both the input `X` and `Label` can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD with input `X`. +Both the input X and Label can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input X. + )DOC"); } }; diff --git a/paddle/operators/decayed_adagrad_op.cc b/paddle/operators/decayed_adagrad_op.cc index 17b394aa07cb0c7ca6e085b61590ff052221b22c..640b4e77448d1b64bcf7375f26c07ff1d2bdeaa3 100644 --- a/paddle/operators/decayed_adagrad_op.cc +++ b/paddle/operators/decayed_adagrad_op.cc @@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker { "Constant for numerical stability") .SetDefault(1.0e-6f); AddComment(R"DOC( +Decayed Adagrad Optimizer. -Decayed Adagrad +The update is done as follows: -moment_out = decay * moment + (1 - decay) * grad * grad -param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon) +$$ +moment\_out = decay * moment + (1 - decay) * grad * grad \\ +param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon} +$$ + +The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) +does not have an epsilon attribute. It is added here for numerical +stability to avoid the division by zero error. )DOC"); } diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index ff1ccea3b94dcd55c372b707c2afeda874ed212e..818146aca766cb13b93fd024c11c1209655d9e11 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { DropoutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("dropout_prob", "Probability of setting units to zero.") - .SetDefault(.5f); - AddAttr("is_training", "Whether in training phase.").SetDefault(true); - AddAttr("seed", "Dropout random seed.").SetDefault(0); AddInput("X", "The input of dropout op."); AddOutput("Out", "The output of dropout op."); AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate(); + AddAttr("dropout_prob", "Probability of setting units to zero.") + .SetDefault(.5f); + AddAttr("is_training", "True if in training phase.").SetDefault(true); + AddAttr("seed", "Dropout random seed.").SetDefault(0); + AddComment(R"DOC( Dropout Operator. -'Dropout' refers to randomly dropping out units in a nerual network. It is a +Dropout refers to randomly dropping out units in a nerual network. It is a regularization technique for reducing overfitting by preventing neuron co-adaption during training. The dropout operator randomly set (according to the given dropout probability) the outputs of some units to zero, while others -being set to their inputs. +are set equal to their corresponding inputs. + )DOC"); } }; diff --git a/paddle/operators/dynamic_recurrent_op.cc b/paddle/operators/dynamic_recurrent_op.cc index a0b06ac1dc305bc899f9abaafcc980a6150ecda9..d48cc4e8df587708ab93e7d788145adc01c1d3e5 100644 --- a/paddle/operators/dynamic_recurrent_op.cc +++ b/paddle/operators/dynamic_recurrent_op.cc @@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward]; // inputs and outputs stored in proto AddInput(name.inlinks, - "the inputs that need to be segmented for each step.") + "The inputs that need to be segmented for each step.") .AsDuplicable(); - AddInput(name.initial_states, "variables to initialize states.") + AddInput(name.initial_states, "Variables to initialize the states.") .AsDuplicable(); - AddOutput(name.outlinks, "the outputs that need to concated for all steps.") + AddOutput(name.outlinks, + "The outputs that need to be concatenated for all steps.") .AsDuplicable(); AddOutput(name.step_scopes, "step scopes"); @@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker AddAttr>(name.ex_states, "names of ex_states"); AddAttr>(name.states, "names of states"); - AddComment("This is a RNN operator for varience-length sequences."); + AddComment(R"DOC( +Dynamic Recurrent Operator. + +This is a RNN operator for varience-length sequences. + +)DOC"); } };