diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index b261144f3d7836801e0b7a45a1478d3b801db86d..0ed3508bffb5683dc0464ed0036ee36552a8ecd1 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -18,6 +18,37 @@ limitations under the License. */ namespace paddle { namespace operators { +#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ + class OP_NAME##OpMaker : public framework::OpProtoAndCheckerMaker { \ + public: \ + OP_NAME##OpMaker(OpProto *proto, OpAttrChecker *op_checker) \ + : framework::OpProtoAndCheckerMaker(proto, op_checker) { \ + AddInput("X", "Input of " #OP_NAME "operator"); \ + AddOutput("Out", "Output of" #OP_NAME "operator"); \ + AddAttr("use_mkldnn", \ + "(bool, default false) Only used in mkldnn kernel") \ + .SetDefault(false); \ + AddComment(#OP_COMMENT); \ + } \ + } + +#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME) \ + class OP_NAME##GradMaker : public framework::SingleGradOpDescMaker { \ + public: \ + protected: \ + std::unique_ptr Apply() const override { \ + auto *op = new framework::OpDesc(); \ + op->SetType(#OP_NAME "_grad"); \ + op->SetInput("Out", Input("Out")); \ + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); \ + \ + op->SetAttrMap(Attrs()); \ + \ + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); \ + return std::unique_ptr(op); \ + } \ + } + class ActivationOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -37,346 +68,190 @@ class ActivationOpGrad : public framework::OperatorWithKernel { } }; -class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sigmoid operator"); - AddOutput("Out", "Output of Sigmoid operator"); - AddComment(R"DOC( +constexpr char SigmoidDoc[] = R"DOC( Sigmoid Activation Operator $$out = \frac{1}{1 + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of LogSigmoid operator"); - AddOutput("Out", "Output of LogSigmoid operator"); - AddComment(R"DOC( +constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator $$out = \log \frac{1}{1 + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class ExpOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Exp operator"); - AddOutput("Out", "Output of Exp operator"); - AddComment(R"DOC( +constexpr char ExpDoc[] = R"DOC( Exp Activation Operator. $out = e^x$ -)DOC"); - } -}; +)DOC"; -class ReluOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Relu operator"); - AddOutput("Out", "Output of Relu operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char ReluDoc[] = R"DOC( Relu Activation Operator. $out = \max(x, 0)$ -)DOC"); - } -}; - -class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of LeakyRelu operator"); - AddOutput("Out", "Output of LeakyRelu operator"); - AddAttr("alpha", "The small negative slope").SetDefault(0.02f); - AddComment(R"DOC( -LeakyRelu Activation Operator. - -$out = \max(x, \alpha * x)$ - -)DOC"); - } -}; - -class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softshrink operator"); - AddOutput("Out", "Output of Softshrink operator"); - AddAttr("lambda", "non-negative offset").SetDefault(0.5f); - AddComment(R"DOC( -Softshrink Activation Operator. - -$$ -out = \begin{cases} - x - \lambda, \text{if } x > \lambda \\ - x + \lambda, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ +)DOC"; -)DOC"); - } -}; - -class TanhOpMaker : public framework::OpProtoAndCheckerMaker { - public: - TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Tanh operator"); - AddOutput("Out", "Output of Tanh operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. $$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ -)DOC"); - } -}; +)DOC"; -class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of TanhShrink operator"); - AddOutput("Out", "Output of TanhShrink operator"); - AddComment(R"DOC( +constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. $$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ -)DOC"); - } -}; - -class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { - public: - HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of HardShrink operator"); - AddOutput("Out", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") - .SetDefault(0.5f); - AddComment(R"DOC( -HardShrink Activation Operator. +)DOC"; -$$ -out = \begin{cases} - x, \text{if } x > \lambda \\ - x, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ - -)DOC"); - } -}; - -class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sqrt operator"); - AddOutput("Out", "Output of Sqrt operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char SqrtDoc[] = R"DOC( Sqrt Activation Operator. $out = \sqrt{x}$ -)DOC"); - } -}; +)DOC"; -class AbsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Abs operator"); - AddOutput("Out", "Output of Abs operator"); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddComment(R"DOC( +constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. $out = |x|$ -)DOC"); - } -}; +)DOC"; -class CeilOpMaker : public framework::OpProtoAndCheckerMaker { - public: - CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Ceil operator"); - AddOutput("Out", "Output of Ceil operator"); - AddComment(R"DOC( +constexpr char CeilDoc[] = R"DOC( Ceil Activation Operator. $out = ceil(x)$ -)DOC"); - } -}; +)DOC"; -class FloorOpMaker : public framework::OpProtoAndCheckerMaker { - public: - FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Floor operator"); - AddOutput("Out", "Output of Floor operator"); - AddComment(R"DOC( +constexpr char FloorDoc[] = R"DOC( Floor Activation Operator. $out = floor(x)$ -)DOC"); - } -}; +)DOC"; -class CosOpMaker : public framework::OpProtoAndCheckerMaker { - public: - CosOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Cosine operator"); - AddOutput("Out", "Output of Cosine operator"); - AddComment(R"DOC( +constexpr char CosDoc[] = R"DOC( Cosine Activation Operator. $out = cos(x)$ -)DOC"); - } -}; +)DOC"; -class SinOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SinOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Sine operator"); - AddOutput("Out", "Output of Sine operator"); - AddComment(R"DOC( +constexpr char SinDoc[] = R"DOC( Sine Activation Operator. $out = sin(x)$ -)DOC"); - } -}; +)DOC"; -class RoundOpMaker : public framework::OpProtoAndCheckerMaker { - public: - RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Round operator"); - AddOutput("Out", "Output of Round operator"); - AddComment(R"DOC( +constexpr char RoundDoc[] = R"DOC( Round Activation Operator. $out = [x]$ -)DOC"); - } -}; +)DOC"; -class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { - public: - ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Reciprocal operator"); - AddOutput("Out", "Output of Reciprocal operator"); - AddComment(R"DOC( +constexpr char ReciprocalDoc[] = R"DOC( Reciprocal Activation Operator. $$out = \frac{1}{x}$$ -)DOC"); - } -}; +)DOC"; -class LogOpMaker : public framework::OpProtoAndCheckerMaker { - public: - LogOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Log operator"); - AddOutput("Out", "Output of Log operator"); - AddComment(R"DOC( +constexpr char LogDoc[] = R"DOC( Log Activation Operator. $out = \ln(x)$ Natural logarithm of x. -)DOC"); - } -}; +)DOC"; + +constexpr char SquareDoc[] = R"DOC( +Square Activation Operator. + +$out = x^2$ -class SquareOpMaker : public framework::OpProtoAndCheckerMaker { +)DOC"; + +constexpr char SoftplusDoc[] = R"DOC( +Softplus Activation Operator. + +$out = \ln(1 + e^{x})$ + +)DOC"; + +constexpr char SoftsignDoc[] = R"DOC( +Softsign Activation Operator. + +$$out = \frac{x}{1 + |x|}$$ + +)DOC"; + +class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { public: - SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker) + LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Square operator"); - AddOutput("Out", "Output of Square operator"); + AddInput("X", "Input of LeakyRelu operator"); + AddOutput("Out", "Output of LeakyRelu operator"); + AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( -Square Activation Operator. +LeakyRelu Activation Operator. -$out = x^2$ +$out = \max(x, \alpha * x)$ )DOC"); } }; -class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { +class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softplus operator"); - AddOutput("Out", "Output of Softplus operator"); + AddInput("X", "Input of Softshrink operator"); + AddOutput("Out", "Output of Softshrink operator"); + AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( -Softplus Activation Operator. +Softshrink Activation Operator. -$out = \ln(1 + e^{x})$ +$$ +out = \begin{cases} + x - \lambda, \text{if } x > \lambda \\ + x + \lambda, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ )DOC"); } }; -class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { +class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker) + HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Softsign operator"); - AddOutput("Out", "Output of Softsign operator"); + AddInput("X", "Input of HardShrink operator"); + AddOutput("Out", "Output of HardShrink operator"); + AddAttr("threshold", "The value of threshold for HardShrink") + .SetDefault(0.5f); AddComment(R"DOC( -Softsign Activation Operator. +HardShrink Activation Operator. -$$out = \frac{x}{1 + |x|}$$ +$$ +out = \begin{cases} + x, \text{if } x > \lambda \\ + x, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} +$$ )DOC"); } @@ -553,100 +428,80 @@ $$out = \frac{x}{1 + e^{- \beta x}}$$ } }; +REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc); +REGISTER_ACTIVATION_OP_MAKER(LogSigmoid, LogSigmoidDoc); +REGISTER_ACTIVATION_OP_MAKER(Exp, ExpDoc); +REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc); +REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); +REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); +REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); +REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc); +REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc); +REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc); +REGISTER_ACTIVATION_OP_MAKER(Cos, CosDoc); +REGISTER_ACTIVATION_OP_MAKER(Sin, SinDoc); +REGISTER_ACTIVATION_OP_MAKER(Round, RoundDoc); +REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc); +REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); +REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); +REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); +REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); + +// NOTE(*) only gradient can be inplaced need to register its gradient maker, +// To tell the executor which input variable is used. By default, every Input +// variable +// is used in gradient operator. +// The operator name written in lowercase intentionally. +REGISTER_ACTIVATION_OP_GRAD_MAKER(sigmoid); +REGISTER_ACTIVATION_OP_GRAD_MAKER(exp); +REGISTER_ACTIVATION_OP_GRAD_MAKER(relu); +REGISTER_ACTIVATION_OP_GRAD_MAKER(tanh); +REGISTER_ACTIVATION_OP_GRAD_MAKER(sqrt); +REGISTER_ACTIVATION_OP_GRAD_MAKER(ceil); +REGISTER_ACTIVATION_OP_GRAD_MAKER(floor); +REGISTER_ACTIVATION_OP_GRAD_MAKER(reciprocal); +REGISTER_ACTIVATION_OP_GRAD_MAKER(relu6); +REGISTER_ACTIVATION_OP_GRAD_MAKER(soft_relu); +REGISTER_ACTIVATION_OP_GRAD_MAKER(hard_sigmoid); } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad, - ops::ActivationOpGrad); - -REGISTER_OP(logsigmoid, ops::ActivationOp, ops::LogSigmoidOpMaker, - logsigmoid_grad, ops::ActivationOpGrad); - -REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad, - ops::ActivationOpGrad); - -REGISTER_OP(relu, ops::ActivationWithMKLDNNOp, ops::ReluOpMaker, relu_grad, - ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OP(tanh, ops::ActivationWithMKLDNNOp, ops::TanhOpMaker, tanh_grad, - ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, - tanh_shrink_grad, ops::ActivationOpGrad); - -REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, - softshrink_grad, ops::ActivationOpGrad); - -REGISTER_OP(sqrt, ops::ActivationWithMKLDNNOp, ops::SqrtOpMaker, sqrt_grad, - ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OP(abs, ops::ActivationWithMKLDNNOp, ops::AbsOpMaker, abs_grad, - ops::ActivationWithMKLDNNOpGrad); - -REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad, - ops::ActivationOpGrad); - -REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, - ops::ActivationOpGrad); - -REGISTER_OP(cos, ops::ActivationOp, ops::CosOpMaker, cos_grad, - ops::ActivationOpGrad); - -REGISTER_OP(sin, ops::ActivationOp, ops::SinOpMaker, sin_grad, - ops::ActivationOpGrad); - -REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad, - ops::ActivationOpGrad); - -REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker, - reciprocal_grad, ops::ActivationOpGrad); - -REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad, - ops::ActivationOpGrad); - -REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad, - ops::ActivationOpGrad); - -REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad, - ops::ActivationOpGrad); - -REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, - ops::ActivationOpGrad); - -REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, - ops::ActivationOpGrad); - -REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, - leaky_relu_grad, ops::ActivationOpGrad); - -REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad, - ops::ActivationOpGrad); - -REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, - ops::ActivationOpGrad); - -REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, - ops::ActivationOpGrad); - -REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, - ops::ActivationOpGrad); - -REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, - ops::ActivationOpGrad); - -REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, - hard_shrink_grad, ops::ActivationOpGrad); - -REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, - thresholded_relu_grad, ops::ActivationOpGrad); - -REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, - hard_sigmoid_grad, ops::ActivationOpGrad); - -REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad, - ops::ActivationOpGrad); +#define REGISTER_ACTIVATION_OP(act_type, op_name) \ + REGISTER_OP(act_type, ops::ActivationOp, ops::op_name##OpMaker, \ + act_type##_grad, ops::ActivationOpGrad); + +#define FOR_EACH_OP_FUNCTOR(__macro) \ + __macro(sigmoid, Sigmoid); \ + __macro(logsigmoid, LogSigmoid); \ + __macro(exp, Exp); \ + __macro(tanh, Tanh); \ + __macro(softshrink, SoftShrink); \ + __macro(sqrt, Sqrt); \ + __macro(abs, Abs); \ + __macro(ceil, Ceil); \ + __macro(floor, Floor); \ + __macro(cos, Cos); \ + __macro(sin, Sin); \ + __macro(round, Round); \ + __macro(reciprocal, Reciprocal); \ + __macro(log, Log); \ + __macro(square, Square); \ + __macro(brelu, BRelu); \ + __macro(soft_relu, SoftRelu); \ + __macro(pow, Pow); \ + __macro(stanh, STanh); \ + __macro(softplus, Softplus); \ + __macro(softsign, Softsign); \ + __macro(relu6, Relu6); \ + __macro(leaky_relu, LeakyRelu); \ + __macro(tanh_shrink, TanhShrink); \ + __macro(elu, ELU); \ + __macro(hard_shrink, HardShrink); \ + __macro(hard_sigmoid, HardSigmoid); \ + __macro(swish, Swish); \ + __macro(thresholded_relu, ThresholdedRelu); #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ @@ -661,4 +516,5 @@ REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad, ops::ActivationGradKernel>); +FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP); FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 43856780bf9357281ac4af2968950da15426e5c8..eaeaca84ab8592bcb7e4ef1451b4aeb0b2d9cf59 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -10,6 +10,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include +#include #include #include @@ -25,6 +28,16 @@ limitations under the License. */ namespace paddle { namespace operators { +/* Use ugly global variable, for the using in python layer side + Please refer to the layer_helper.py and get the details. + */ +static std::unordered_set InplaceOpSet = { + "sigmoid", "exp", "relu", "tanh", "sqrt", "ceil", + "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid", +}; + +static bool IsInplace(std::string op) { return InplaceOpSet.count(op); } + template class ActivationKernel : public framework::OpKernel { @@ -60,7 +73,6 @@ class ActivationGradKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - auto* X = context.Input("X"); auto* Out = context.Input("Out"); auto* dOut = context.Input(framework::GradVarName("Out")); @@ -68,7 +80,6 @@ class ActivationGradKernel dX->mutable_data(context.GetPlace()); auto dout = framework::EigenVector::Flatten(*dOut); - auto x = framework::EigenVector::Flatten(*X); auto out = framework::EigenVector::Flatten(*Out); auto dx = framework::EigenVector::Flatten(*dX); auto* place = @@ -78,7 +89,16 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, out, dout, dx); + bool inplace = functor.Inplace(); + if (!inplace) { + auto* X = context.Input("X"); + auto x = framework::EigenVector::Flatten(*X); + functor(*place, x, out, dout, dx); + } else { + VLOG(10) << " Inplace activation "; + auto x = framework::EigenVector::Flatten(*dX); + functor(*place, x, out, dout, dx); + } } }; @@ -89,6 +109,14 @@ struct BaseActivationFunctor { using AttrPair = std::vector>; AttrPair GetAttrs() { return AttrPair(); } + + /* NOTE(*): Output reuse X memory if X is not dependented by its Gradient. + For example, sigmoid op's gradient didn't involve x, so its output can + reuse + input memory. But abs op's gradient use x, it can not be inplaced. + gradient did use x. + */ + bool Inplace() const { return false; } }; // sigmoid(x) = 1 / (1 + exp(-x)) @@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor { template struct SigmoidGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor { template struct ExpGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("exp"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor { template struct ReluGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (x > static_cast(0)).template cast(); + dx.device(d) = dout * (out > static_cast(0)).template cast(); } }; @@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor { template struct TanhGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("tanh"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor { template struct SqrtGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("sqrt"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor { template struct ZeroGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("ceil"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = static_cast(0) / x; + dx.device(d) = static_cast(0) / out; } }; @@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor { template struct ReciprocalGradFunctor : public BaseActivationFunctor { + bool Inplace() const { return IsInplace("reciprocal"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -531,12 +566,14 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } + bool Inplace() const { return IsInplace("relu6"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * - ((x > static_cast(0)) * (x < static_cast(threshold))) - .template cast(); + dx.device(d) = + dout * + ((out > static_cast(0)) * (out < static_cast(threshold))) + .template cast(); } }; @@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } + bool Inplace() const { return IsInplace("softrelu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto tmp = static_cast(threshold); - auto temp = ((x > -tmp) * (x < tmp)).template cast().eval(); + auto temp = ((out > -tmp) * (out < tmp)).template cast().eval(); dx.device(d) = dout * (static_cast(1) - (-out).exp()) * temp; } }; @@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"slope", &slope}, {"offset", &offset}}; } - + bool Inplace() { return IsInplace("hard_sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index a1e8ff6399f0812773a7bb753c90e4400b1763d9..a8a88bf19a28093b91fe5744379cf547ffd3988d 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -33,6 +33,7 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle. self.back().set_lod(t.lod()); }); + m.def("IsInplace", + [](std::string op) -> bool { return operators::IsInplace(op); }); + m.def("op_support_gpu", OpSupportGPU); #ifdef PADDLE_WITH_CUDA m.def("get_cuda_device_count", platform::GetCUDADeviceCount); diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index d771837fc545167f7c32fcf914dd1c3c3ae64fb3..62933b512529bd04fab8c9ded12e636ecfae685c 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup import unique_name from paddle.fluid.initializer import Constant, Xavier from param_attr import ParamAttr, WeightNormParamAttr +import core class LayerHelper(object): @@ -398,13 +399,16 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.dtype) if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act_type = act.pop('type') if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') + tmp = input_var + # NOTE(dzhwinter): some activation support inplace compution. + if not core.IsInplace(act_type): + tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type=act_type, inputs={"X": [input_var]}, diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 57d4a50e913c0d2994c62600f4e479056ed4c306..a0189e8df70f3edf5222c8987bb6960256c36b31 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -361,10 +361,7 @@ class TestCeil(OpTest): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) + # The same reason with TestFloor def init_dtype(self): pass @@ -396,10 +393,8 @@ class TestFloor(OpTest): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) + # the gradient on floor, ceil, round is undefined. + # we return zero as gradient, but the numpy return nan def init_dtype(self): pass @@ -501,11 +496,6 @@ class TestRound(OpTest): def test_check_output(self): self.check_output() - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) - def init_dtype(self): pass