diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 61a201b6cd7198575c22ea506c72e9233057a852..ced14a8923140ec6b08e3e6725a5780b61033daf 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -206,6 +206,17 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { } }; +class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SoftplusOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Softplus operator"); + AddOutput("Y", "Output of Softplus operator"); + AddComment("Softplus activation operator, softplus(x) = log(1 + exp(x))"); + } +}; + class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftsignOpMaker(framework::OpProto *proto, @@ -351,6 +362,9 @@ REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad, REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad, ops::ActivationOpGrad); +REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad, + ops::ActivationOpGrad); + REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, ops::ActivationOpGrad); diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 29f159bbae1dca4da664c46a4af55823d9d6297b..f88c9c48eb9fcb779de5a99a45a832e582d76ab0 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -407,6 +407,33 @@ struct Relu6GradFunctor : public BaseActivationFunctor { } }; +// softplus(x) = log(1 + exp(x)) +// When x is a very large positive number, exp(x) may explode to inf, +// Using trick below for numerical stability +// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ +// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0))) +template +struct SoftplusFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) { + auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) + y.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); + } +}; + +// d(softplus(x))/dx = exp(x) / (1 + exp(x)) +// For numerical stability: +// d(softplus(x))/dx = exp(x - max(x, 0)) / (exp(-max(x, 0)) + +// exp(x - max(x, 0))) +template +struct SoftplusGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y, dY dy, dX dx) { + auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) + dx.device(d) = dy * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); + } +}; + // softsign(x) = x / (1 + |x|) template struct SoftsignFunctor : public BaseActivationFunctor { @@ -582,6 +609,7 @@ struct STanhGradFunctor : public BaseActivationFunctor { __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(pow, PowFunctor, PowGradFunctor); \ __macro(stanh, STanhFunctor, STanhGradFunctor); \ + __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(relu6, Relu6Functor, Relu6GradFunctor); \ __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py index 52e027bd54a03d263d473cd87d3e66f5dd34e812..a28c4431e1ae9230750247c0ed16c9aff37364fa 100644 --- a/python/paddle/v2/framework/tests/test_activation_op.py +++ b/python/paddle/v2/framework/tests/test_activation_op.py @@ -331,6 +331,21 @@ class TestSTanh(OpTest): self.check_grad(['X'], 'Y', max_relative_error=0.007) +class TestSoftplus(OpTest): + def setUp(self): + self.op_type = "softplus" + self.inputs = { + 'X': np.random.uniform(-1, 1, [11, 17]).astype("float32") + } + self.outputs = {'Y': np.log(1 + np.exp(self.inputs['X']))} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + class TestSoftsign(OpTest): def setUp(self): self.op_type = "softsign"