未验证 提交 7408a4c4 编写于 作者: K kexinzhao 提交者: GitHub

Merge pull request #5354 from kexinzhao/cos_sim_to_dynamic_recur_op

polish operator doc
......@@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label");
// Assume indices has same shape with infernece, because
// Assume indices has same shape as inference, because
// it's the output of topk.
PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2.");
......@@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
// TODO(typhoonzero): support both inference value and indices.
AddInput("Out", "topk (inferences) the network output");
AddInput("Indices", "topk (indices) the network output");
AddInput("Out", "The network output of topk (inferences)");
AddInput("Indices", "The the network output of topk (indices)");
AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch");
AddComment(R"DOC(
Accuracy. It will print accuracy rate for classification.
The accuracy is:
.. math::
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
Accuracy Operator.
It will print accuracy rate for classification.
The accuracy is calculated as follows:
$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
Both the input Out and Label can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD information
with the input Out(Inference).
Both the input `Out` and `Label` can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD with input `Inference`.
)DOC");
}
};
......
......@@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
}
};
......
......@@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Cosine Similarity Operator.
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$
The input `X` and `Y` must have the same shape, except that the 1st dimension
of input `Y` could be just 1 (different from input `X`), which will be
broadcasted to match the shape of input `X` before computing their cosine
The input X and Y must have the same shape, except that the 1st dimension
of input Y could be just 1 (different from input X), which will be
broadcasted to match the shape of input X before computing their cosine
similarity.
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC");
}
};
......
......@@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)");
"The input should be a k-D tensor(k > 0 and k < 7).");
AddInput("Y",
"The input used as reference for cropping"
" with the same dimension as X. ")
"The input used as reference for cropping, "
"which is of the same dimensions as X.")
.AsDispensable();
AddOutput("Out",
"The output of crop op "
"with the same dimension as X.");
"The output of crop op, "
"which is of the same dimensions as X.");
AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped."
"The size of offsets list should be as same as "
"dimension size of input X.");
"A list<int> describing offsets to be cropped. "
"The size of offsets list should be the same as "
"the dimension size of input X.");
AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output."
"The size of shape list should be as same as "
"dimension size of input X.")
"A list<int> describing the shape of output. "
"The size of shape list should be the same as "
"the dimension size of input X.")
.SetDefault(std::vector<int>());
AddComment(R"DOC(
Crop Operator.
Crop input into output, as specified by offsets and shape.
There are two ways to set shape:
1. referenc input: crop input X as shape as reference input.
1. reference input: crop input X into the same shape as reference input.
The dimension of reference input should
be as same as input X.
2. shape list: crop input X by shape described by a list<int>.
The size of shape list should be as same as
dimension size of input X.
be the same as the dimension of input X.
2. shape list: crop input X into the shape described by a list<int>.
The size of shape list should be the same as
the dimension size of input X.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
......@@ -91,20 +92,20 @@ Given:
X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]],
and
offsets = [0, 1]
offsets = [0, 1],
and
shape = [2, 2]
shape = [2, 2],
then we get
we get:
Out = [[1, 2],
[3, 4]]
[3, 4]].
)DOC");
}
......
......@@ -117,9 +117,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
"Label",
"(Tensor, default Tensor<int>), the ground truth which is "
"a 2-D tensor. "
"When soft_label is set to false, `Label` is a Tensor<int> with shape "
"When soft_label is set to false, Label is a Tensor<int> with shape "
"[N x 1]. "
"When soft_label is set to true, `Label` is a Tensor<float/double> "
"When soft_label is set to true, Label is a Tensor<float/double> "
"with shape [N x K].");
AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor "
......@@ -137,13 +137,13 @@ computation.
1) One-hot cross-entropy:
soft_label = false, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]])
$Y[i] = -\log(X[i, Label[i]])$
2) Soft-label cross-entropy:
soft_label = true, Label[i, j] indicates the soft label of class j
for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
$Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
Please make sure that in this case the summuation of each row of Label
equals one.
......@@ -153,8 +153,9 @@ computation.
non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation.
Both the input `X` and `Label` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC");
}
};
......
......@@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Decayed Adagrad Optimizer.
Decayed Adagrad
The update is done as follows:
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
$$
moment\_out = decay * moment + (1 - decay) * grad * grad \\
param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
)DOC");
}
......
......@@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
DropoutOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddInput("X", "The input of dropout op.");
AddOutput("Out", "The output of dropout op.");
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "True if in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddComment(R"DOC(
Dropout Operator.
'Dropout' refers to randomly dropping out units in a nerual network. It is a
Dropout refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others
being set to their inputs.
are set equal to their corresponding inputs.
)DOC");
}
};
......
......@@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker
RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward];
// inputs and outputs stored in proto
AddInput(name.inlinks,
"the inputs that need to be segmented for each step.")
"The inputs that need to be segmented for each step.")
.AsDuplicable();
AddInput(name.initial_states, "variables to initialize states.")
AddInput(name.initial_states, "Variables to initialize the states.")
.AsDuplicable();
AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
AddOutput(name.outlinks,
"The outputs that need to be concatenated for all steps.")
.AsDuplicable();
AddOutput(name.step_scopes, "step scopes");
......@@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker
AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states");
AddAttr<std::vector<std::string>>(name.states, "names of states");
AddComment("This is a RNN operator for varience-length sequences.");
AddComment(R"DOC(
Dynamic Recurrent Operator.
This is a RNN operator for varience-length sequences.
)DOC");
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册