提交 cb0118f3 编写于 作者: K kexinzhao 提交者: Yi Wang

Polish Operator Doc (m) (#5375)

* fix m_ops

* fix activation op
上级 5d8cdf20
......@@ -44,7 +44,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sigmoid operator");
AddOutput("Y", "Output of Sigmoid operator");
AddComment(R"DOC(
Sigmoid activation operator.
Sigmoid Activation Operator.
$y = 1 / (1 + e^{-x})$
......@@ -60,7 +60,7 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of LogSigmoid operator");
AddOutput("Y", "Output of LogSigmoid operator");
AddComment(R"DOC(
Logsigmoid activation operator.
Logsigmoid Activation Operator.
$y = \log(1 / (1 + e^{-x}))$
......@@ -75,7 +75,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Exp operator");
AddOutput("Y", "Output of Exp operator");
AddComment(R"DOC(
Exp activation operator.
Exp Activation Operator.
$y = e^x$
......@@ -90,7 +90,7 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Relu operator");
AddOutput("Y", "Output of Relu operator");
AddComment(R"DOC(
Relu activation operator.
Relu Activation Operator.
$y = \max(x, 0)$
......@@ -109,7 +109,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The small negative slope")
.SetDefault(static_cast<AttrType>(0.02f));
AddComment(R"DOC(
LeakyRelu activation operator.
LeakyRelu Activation Operator.
$y = \max(x, \alpha * x)$
......@@ -128,7 +128,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("lambda", "non-negative offset")
.SetDefault(static_cast<AttrType>(0.5f));
AddComment(R"DOC(
Softshrink activation operator.
Softshrink Activation Operator.
$$
y = \begin{cases}
......@@ -149,7 +149,7 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Tanh operator");
AddOutput("Y", "Output of Tanh operator");
AddComment(R"DOC(
Tanh activation operator.
Tanh Activation Operator.
$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
......@@ -165,7 +165,7 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of TanhShrink operator");
AddOutput("Y", "Output of TanhShrink operator");
AddComment(R"DOC(
TanhShrink activation operator.
TanhShrink Activation Operator.
$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
......@@ -184,7 +184,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The value of threshold for HardShrink")
.SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC(
HardShrink activation operator.
HardShrink Activation Operator.
$$
y = \begin{cases}
......@@ -205,7 +205,7 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sqrt operator");
AddOutput("Y", "Output of Sqrt operator");
AddComment(R"DOC(
Sqrt activation operator.
Sqrt Activation Operator.
$y = \sqrt{x}$
......@@ -220,7 +220,7 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Abs operator");
AddOutput("Y", "Output of Abs operator");
AddComment(R"DOC(
Abs activation operator.
Abs Activation Operator.
$y = |x|$
......@@ -236,7 +236,7 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Reciprocal operator");
AddOutput("Y", "Output of Reciprocal operator");
AddComment(R"DOC(
Reciprocal activation operator.
Reciprocal Activation Operator.
$$y = \frac{1}{x}$$
......@@ -251,7 +251,7 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Log operator");
AddOutput("Y", "Output of Log operator");
AddComment(R"DOC(
Log activation operator.
Log Activation Operator.
$y = \ln(x)$
......@@ -268,7 +268,7 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Square operator");
AddOutput("Y", "Output of Square operator");
AddComment(R"DOC(
Square activation operator.
Square Activation Operator.
$y = x^2$
......@@ -284,7 +284,7 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator");
AddComment(R"DOC(
Softplus activation operator.
Softplus Activation Operator.
$y = \ln(1 + e^{x})$
......@@ -300,7 +300,7 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softsign operator");
AddOutput("Y", "Output of Softsign operator");
AddComment(R"DOC(
Softsign activation operator.
Softsign Activation Operator.
$$y = \frac{x}{1 + |x|}$$
......@@ -320,7 +320,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("t_max", "The max marginal value of BRelu")
.SetDefault(static_cast<AttrType>(24));
AddComment(R"DOC(
BRelu activation operator.
BRelu Activation Operator.
$y = \max(\min(x, t_{min}), t_{max})$
......@@ -339,7 +339,7 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of SoftRelu")
.SetDefault(static_cast<AttrType>(40));
AddComment(R"DOC(
SoftRelu activation operator.
SoftRelu Activation Operator.
$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$
......@@ -357,7 +357,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The alpha value of ELU")
.SetDefault(static_cast<AttrType>(1.0f));
AddComment(R"DOC(
ELU activation operator.
ELU Activation Operator.
Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1511.07289.
......@@ -378,7 +378,7 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of Relu6")
.SetDefault(static_cast<AttrType>(6));
AddComment(R"DOC(
Relu6 activation operator.
Relu6 Activation Operator.
$y = \min(\max(0, x), 6)$
......@@ -396,7 +396,7 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("factor", "The exponential factor of Pow")
.SetDefault(static_cast<AttrType>(1));
AddComment(R"DOC(
Pow activation operator.
Pow Activation Operator.
$y = x^{factor}$
......@@ -416,7 +416,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("scale_b", "The scale parameter of b for the input")
.SetDefault(static_cast<AttrType>(1.7159));
AddComment(R"DOC(
STanh activation operator.
STanh Activation Operator.
$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
......@@ -435,7 +435,7 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold location of activation")
.SetDefault(static_cast<AttrType>(1.0));
AddComment(R"DOC(
ThresholdedRelu activation operator.
ThresholdedRelu Activation Operator.
$$
y = \begin{cases}
......@@ -461,7 +461,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC(
HardSigmoid activation operator.
HardSigmoid Activation Operator.
Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391),
which is much faster than sigmoid.
......
......@@ -55,8 +55,6 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
"(2-D tensor with shape [batch_size x 1]) "
"The label indicating X1 ranked higher than X2 or not, "
"can only be +1 or -1.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddOutput("Activated",
"(2-D tensor with shape [batch_size x 1]) Intermediate tensor "
"to indicate whether each element of Output(Out) is activated.")
......@@ -64,23 +62,26 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out",
"(2-D tensor with shape [batch_size x 1]) "
"The output loss of MarginRankLoss operator.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddComment(R"DOC(
MarginRankLoss Operator.
MarginRankLoss operator measures the loss given a pair of training sample
This operator measures the loss given a pair of training sample
{`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1`
indicating X1 is ranked higher than `X2`, otherwise `Label = -1`. The loss
turns out
indicating X1 is ranked higher than `X2` and `Label = -1` otherwise. The loss
is calculated as:
loss(X1, X2, Label) = max(0, -Label * (X1 - X2) + margin).
$loss(X1, X2, Label) = \max(0, -Label * (X1 - X2) + margin)$
The attribute `margin` involved here helps make the predictions more robust.
The attribute `margin` here helps make the predictions more robust.
Denote the item ranked higher as the positive sample, otherwise the negative
sample. If the score of the two samples satisfies
positive sample - negative sample < margin,
$positive sample - negative sample < margin$
the pair of samples will contribute to the final loss, which will backpropogate
and train the ranking model to enlarge the difference of the two score.
the pair of samples will contribute to the final loss, which will backpropagate
and train the ranking model to enlarge the difference between the two scores.
For batch input with size `batch_size`, `X1`, `X2` and `Label`
all have the same shape [batch_size x 1].
......
......@@ -144,7 +144,10 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC")
.SetDefault(false);
AddComment(R"DOC(
The MatMul operator is used to perform (batched) matrix multiplication
MatMul Operator.
This operator is used to perform (batched) matrix multiplication
over the last two dimensions of the input tensors `X` and `Y`.
If a transpose flag is specified, the last two dimensions of the
......@@ -166,7 +169,8 @@ The differences are:
- We add `transpose_X` and `transpose_Y` flags.
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
or not. But the output only shares the LoD information with input `X`.
)DOC");
}
};
......
......@@ -36,7 +36,11 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op");
AddComment(R"DOC( Mean Operator
AddComment(R"DOC(
Mean Operator.
Out is a scalar which is the mean of all elements in X.
)DOC");
}
};
......
......@@ -52,14 +52,16 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Y", "The right tensor of minus operator.");
AddOutput("Out", "The output tensor of minus operator.");
AddComment(R"DOC(Minus Operator
AddComment(R"DOC(
Minus Operator.
Equation:
Out = X - Y
$Out = X - Y$
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
or not. But the output only shares the LoD information with input `X`.
)DOC");
}
};
......
......@@ -43,27 +43,35 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"The input tensor of modified huber loss op."
"The input tensor of modified huber loss op. "
"X is 2-D tensor with shape [batch_size, 1].");
AddInput("Y",
"The target labels of modified huber loss op."
"The shape of Y is same as X. Values of Y must be 0 or 1.");
"The target labels of modified huber loss op. "
"The shape of Y is the same as X. Values of Y must be 0 or 1.");
AddOutput("IntermediateVal",
"Variable to save intermediate result which will be reused in "
"backward processing.")
.AsIntermediate();
AddOutput("Out", "Classification loss for X.");
AddComment(R"DOC(
Modified huber loss is used in binary classification problem. The shape of
input X and target Y are both [N, 1] and so is the shape of output loss.
Since target Y is not differentiable, cacluating gradient for Y is illegal.
The formulation of modified huber loss is:
L(y, f(x)) = max(0, 1 - yf(x))^2 for yf(x) >= -1,
-4yf(x) otherwise.
Make sure the values of target label Y are in {0, 1} here. The operator will
Modified Huber Loss Operator.
This operator is used in binary classification problem. The shape of
input X and target Y are both [N, 1] and so is the shape of the output loss.
Since target Y is not differentiable, calculating gradient for Y is illegal.
The formula of modified huber loss is:
$$
L(y, f(x)) =
\begin{cases}
(\max(0, 1 - yf(x)))^2, \text{if} \ yf(x) >= -1 \\
-4yf(x), \quad \text{otherwise}
\end{cases}
$$
Make sure the values of target label Y are in {0, 1} here. This operator will
scale values of Y to {-1, +1} when computing losses and gradients.
)DOC");
}
};
......
......@@ -75,17 +75,23 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("VelocityOut", "(Tensor) Output updated velocity");
AddAttr<float>("mu", "(float) Momentum coefficient");
AddAttr<bool>("useNesterov", "(bool) Use Nesterov Momentum")
AddAttr<bool>("useNesterov",
"(bool, default false) "
"Use Nesterov Momentum")
.SetDefault(false);
AddComment(R"DOC(
Momentum Algorithm with a flag for Nestrov Moemntum (momentum).
velocity = mu * velocity + gradient
if (use_nesterov):
param = param - gradient * learning_rate + mu * velocity * learning_rate
else:
param = param - learning_rate * velocity
Momentum Optimizer.
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
$$
velocity = mu * velocity + gradient \\
if (use\_nesterov): \\
param = param - gradient * learning\_rate + mu * velocity * learning\_rate \\
else: \\
param = param - learning\_rate * velocity. \\
$$
)DOC");
}
......
......@@ -78,6 +78,7 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "The output of mul op");
AddAttr<int>(
"x_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
in that case, tensors will be reshaped to a matrix. The matrix's first
dimension(column length) will be the product of tensor's last
......@@ -88,20 +89,24 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
.EqualGreaterThan(1);
AddAttr<int>(
"y_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
in that case, tensors will be reshaped to a matrix. Just like input `X`.
)DOC")
.SetDefault(1)
.EqualGreaterThan(1);
AddComment(R"DOC(
Mul operator is used to perform matrix multiplication for input X and Y.
Mul Operator.
This operator is used to perform matrix multiplication for input X and Y.
The equation is:
Out = X * Y
$$Out = X * Y$$
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
or not. But the output only shares the LoD information with input `X`.
)DOC");
}
};
......
......@@ -66,7 +66,8 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The candidate tensors of multiplex operator.")
.AsDuplicable();
AddOutput("Out", "The output tensor of multiplex operator.");
AddComment(R"DOC(Multiplex operator
AddComment(R"DOC(
Multiplex Operator.
Multiplex multiple tensors according to the index provided by the index tensor.
......@@ -77,10 +78,11 @@ the (Ids[i])-th tensor.
For i-th row of the output tensor:
y[i] = x_{k}[i]
$$y[i] = x_{k}[i]$$
where y is the output tensor. `x_{k}` is the k-th input tensor
where `y` is the output tensor, `x_{k}` is the k-th input tensor,
and `k = Ids[i]`.
)DOC");
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册