diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 90f1535fcd387c34ea39d84d9c2ec78fcbc3c764..483f9888973edc9db6317723c136778d40cc7878 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -43,7 +43,12 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sigmoid operator"); AddOutput("Y", "Output of Sigmoid operator"); - AddComment("Sigmoid activation operator, sigmoid = 1 / (1 + exp(-x))"); + AddComment(R"DOC( +Sigmoid activation operator. + +$y = 1 / (1 + e^{-x})$ + +)DOC"); } }; @@ -54,8 +59,12 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LogSigmoid operator"); AddOutput("Y", "Output of LogSigmoid operator"); - AddComment( - "Logsigmoid activation operator, logsigmoid = log (1 / (1 + exp(-x)))"); + AddComment(R"DOC( +Logsigmoid activation operator. + +$y = \log(1 / (1 + e^{-x}))$ + +)DOC"); } }; @@ -65,7 +74,12 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Exp operator"); AddOutput("Y", "Output of Exp operator"); - AddComment("Exp activation operator, exp(x) = e^x"); + AddComment(R"DOC( +Exp activation operator. + +$y = e^x$ + +)DOC"); } }; @@ -75,7 +89,12 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu operator"); AddOutput("Y", "Output of Relu operator"); - AddComment("Relu activation operator, relu(x) = max(x, 0)"); + AddComment(R"DOC( +Relu activation operator. + +$y = \max(x, 0)$ + +)DOC"); } }; @@ -87,11 +106,14 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Y", "Output of LeakyRelu operator"); - AddComment( - "LeakyRelu activation operator, " - "leaky_relu = max(x, alpha * x)"); AddAttr("alpha", "The small negative slope") .SetDefault(static_cast(0.02f)); + AddComment(R"DOC( +LeakyRelu activation operator. + +$y = \max(x, \alpha * x)$ + +)DOC"); } }; @@ -103,12 +125,20 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); AddOutput("Y", "Output of Softshrink operator"); - AddComment( - "Softshrink activation operator, " - "softshrink = x - lambda, if x > lambda;" - " x + lambda, if x < lambda; 0 otherwise"); AddAttr("lambda", "non-negative offset") .SetDefault(static_cast(0.5f)); + AddComment(R"DOC( +Softshrink activation operator. + +$$ +y = \begin{cases} + x - \lambda, \text{if } x > \lambda \\ + x + \lambda, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ + +)DOC"); } }; @@ -118,9 +148,12 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Tanh operator"); AddOutput("Y", "Output of Tanh operator"); - AddComment( - "Tanh activation operator, tanh = (exp(x) - exp(-x)) / (exp(x) + " - "exp(-x))"); + AddComment(R"DOC( +Tanh activation operator. + +$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ + +)DOC"); } }; @@ -131,7 +164,12 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of TanhShrink operator"); AddOutput("Y", "Output of TanhShrink operator"); - AddComment("TanhShrink activation operator, tanhshrink(x) = x - tanh(x)"); + AddComment(R"DOC( +TanhShrink activation operator. + +$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ + +)DOC"); } }; @@ -143,13 +181,20 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); AddOutput("Y", "Output of HardShrink operator"); - AddComment( - "HardShrink activation operator, " - "hard_shrink(x) = x if x > lambda" - "hard_shrink(x) = x if x < -lambda" - "hard_shrink(x) = 0 otherwise"); AddAttr("threshold", "The value of threshold for HardShrink") .SetDefault(static_cast(0.5)); + AddComment(R"DOC( +HardShrink activation operator. + +$$ +y = \begin{cases} + x, \text{if } x > \lambda \\ + x, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ + +)DOC"); } }; @@ -159,7 +204,12 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sqrt operator"); AddOutput("Y", "Output of Sqrt operator"); - AddComment("Sqrt activation operator, sqrt(x) = x^(1/2)"); + AddComment(R"DOC( +Sqrt activation operator. + +$y = \sqrt{x}$ + +)DOC"); } }; @@ -169,7 +219,12 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Abs operator"); AddOutput("Y", "Output of Abs operator"); - AddComment("Abs activation operator, abs(x) = |x|"); + AddComment(R"DOC( +Abs activation operator. + +$y = |x|$ + +)DOC"); } }; @@ -180,7 +235,12 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Reciprocal operator"); AddOutput("Y", "Output of Reciprocal operator"); - AddComment("Reciprocal activation operator, reciprocal(x) = 1 / x"); + AddComment(R"DOC( +Reciprocal activation operator. + +$$y = \frac{1}{x}$$ + +)DOC"); } }; @@ -190,7 +250,14 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Log operator"); AddOutput("Y", "Output of Log operator"); - AddComment("Log activation operator, log(x) = natural logarithm of x"); + AddComment(R"DOC( +Log activation operator. + +$y = \ln(x)$ + +Natural logarithm of x. + +)DOC"); } }; @@ -200,7 +267,12 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Square operator"); AddOutput("Y", "Output of Square operator"); - AddComment("Square activation operator, square(x) = x^2"); + AddComment(R"DOC( +Square activation operator. + +$y = x^2$ + +)DOC"); } }; @@ -211,7 +283,12 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softplus operator"); AddOutput("Y", "Output of Softplus operator"); - AddComment("Softplus activation operator, softplus(x) = log(1 + exp(x))"); + AddComment(R"DOC( +Softplus activation operator. + +$y = \ln(1 + e^{x})$ + +)DOC"); } }; @@ -222,7 +299,12 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softsign operator"); AddOutput("Y", "Output of Softsign operator"); - AddComment("Softsign activation operator, softsign(x) = x / (1 + |x|)"); + AddComment(R"DOC( +Softsign activation operator. + +$$y = \frac{x}{1 + |x|}$$ + +)DOC"); } }; @@ -233,11 +315,16 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); AddOutput("Y", "Output of BRelu operator"); - AddComment("BRelu activation operator, brelu = max(min(x, t_min), t_max)"); AddAttr("t_min", "The min marginal value of BRelu") .SetDefault(static_cast(0)); AddAttr("t_max", "The max marginal value of BRelu") .SetDefault(static_cast(24)); + AddComment(R"DOC( +BRelu activation operator. + +$y = \max(\min(x, t_{min}), t_{max})$ + +)DOC"); } }; @@ -249,11 +336,14 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); AddOutput("Y", "Output of SoftRelu operator"); - AddComment( - "SoftRelu activation operator, soft_relu = log(1 + exp(max(min(x, " - "threshold), threshold)))"); AddAttr("threshold", "The threshold value of SoftRelu") .SetDefault(static_cast(40)); + AddComment(R"DOC( +SoftRelu activation operator. + +$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ + +)DOC"); } }; @@ -262,19 +352,19 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker { public: ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", - "(Tensor) The input of ELU operator, it shouldn't be empty. Input " - "is flattened and treated as a 1D array."); - AddOutput("Y", - "(Tensor) The output of ELU operator. It has the same shape as " - "the input."); - AddAttr( - "alpha", "(float, default 1.0) Alpha value in the elu formulation.") - .SetDefault(static_cast(1.)); + AddInput("X", "Input of ELU operator"); + AddOutput("Y", "Output of ELU operator"); + AddAttr("alpha", "The alpha value of ELU") + .SetDefault(static_cast(1.0f)); AddComment(R"DOC( - ELU activation operator. It applies this element-wise computation on - the input: f(x) = max(0, x) + min(0, alpha * (exp(x) - 1)). - Check .. _Link: https://arxiv.org/abs/1511.07289 for more details.)DOC"); +ELU activation operator. + +Applies the following element-wise computation on the input according to +https://arxiv.org/abs/1511.07289. + +$y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ + +)DOC"); } }; @@ -285,9 +375,14 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); AddOutput("Y", "Output of Relu6 operator"); - AddComment("Relu6 activation operator, relu6 = min(max(0, x), 6)"); AddAttr("threshold", "The threshold value of Relu6") .SetDefault(static_cast(6)); + AddComment(R"DOC( +Relu6 activation operator. + +$y = \min(\max(0, x), 6)$ + +)DOC"); } }; @@ -298,9 +393,14 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); AddOutput("Y", "Output of Pow operator"); - AddComment("Pow activation operator, pow(x, factor) = x^factor"); AddAttr("factor", "The exponential factor of Pow") .SetDefault(static_cast(1)); + AddComment(R"DOC( +Pow activation operator. + +$y = x^{factor}$ + +)DOC"); } }; @@ -311,11 +411,16 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); AddOutput("Y", "Output of STanh operator"); - AddComment("STanh activation operator, stanh = b * tanh(a * x)"); AddAttr("scale_a", "The scale parameter of a for the input") .SetDefault(static_cast(2 / 3)); AddAttr("scale_b", "The scale parameter of b for the input") .SetDefault(static_cast(1.7159)); + AddComment(R"DOC( +STanh activation operator. + +$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ + +)DOC"); } }; @@ -327,12 +432,19 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); AddOutput("Y", "Output of ThresholdedRelu operator"); - AddComment( - "ThresholdedRelu activation operator, " - "thresholded_relu = x for x > threshold, " - "thresholded_relu = 0 otherwise."); AddAttr("threshold", "The threshold location of activation") .SetDefault(static_cast(1.0)); + AddComment(R"DOC( +ThresholdedRelu activation operator. + +$$ +y = \begin{cases} + x, \text{if } x > threshold \\ + 0, \text{otherwise} + \end{cases} +$$ + +)DOC"); } }; @@ -344,27 +456,23 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); AddOutput("Y", "Output of HardSigmoid operator"); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(static_cast(0.2)); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(static_cast(0.5)); AddComment(R"DOC( -Hard Sigmoid activation operator. +HardSigmoid activation operator. -Segment-wise linear approximation of sigmoid[1]. -This is much faster than sigmoid. +Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), +which is much faster than sigmoid. -hard_sigmoid = max(0, min(1, slope * x + shift)) +$y = \max(0, \min(1, slope * x + shift))$ The slope should be positive. The offset can be either positive or negative. -The default slope and shift are set from [1]. +The default slope and shift are set according to the above reference. It is recommended to use the defaults for this activation. -References: - [1] Noisy Activation Functions - (https://arxiv.org/abs/1603.00391) - - )DOC"); - AddAttr("slope", "Slope for linear approximation of sigmoid") - .SetDefault(static_cast(0.2)); - AddAttr("offset", "Offset for linear approximation of sigmoid") - .SetDefault(static_cast(0.5)); +)DOC"); } }; diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index ddd966e26c9abad0d83f8b5c6e3e7d9ad65158a8..ceb4b4e40b67473f42e67e3f02f8e012e1b1eb50 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -232,7 +232,7 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { } }; -// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < lambda; 0 +// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < -lambda; 0 // otherwise template struct SoftShrinkFunctor : public BaseActivationFunctor {