diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 83262f950e1606cc367ea30b089a983db9848988..7f3118f17603d03903b072b248e033cbd3347623 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -506,6 +506,22 @@ It is recommended to use the defaults for this activation. } }; +class SwishOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SwishOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Swish operator"); + AddOutput("Y", "Output of Swish operator"); + AddAttr("beta", "Constant beta of swish operator").SetDefault(1.0f); + AddComment(R"DOC( +Swish Activation Operator. + +$$y = \frac{x}{1 + e^{- \beta x}}$$ + +)DOC"); + } +}; + } // namespace operators } // namespace paddle @@ -592,6 +608,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, hard_sigmoid_grad, ops::ActivationOpGrad); +REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad, + ops::ActivationOpGrad); + #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ act_type, \ diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 8cd3bfbbd3f8f3210f94aef3a1586c8295730c1d..ac0e0a3b01232940a18ff8b1ce6b6504df1299d1 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -700,6 +700,35 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { } }; +template +struct SwishFunctor : public BaseActivationFunctor { + float beta; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}}; + } + + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x / (static_cast(1) + (static_cast(-beta) * x).exp()); + } +}; + +template +struct SwishGradFunctor : public BaseActivationFunctor { + float beta; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}}; + } + + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + auto temp1 = static_cast(1) / + (static_cast(1) + (static_cast(-beta) * x).exp()); + auto temp2 = temp1 * (static_cast(1) - (beta * y)); + dx.device(d) = dy * ((beta * y) + temp2); + } +}; + } // namespace operators } // namespace paddle @@ -730,4 +759,5 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { __macro(elu, ELUFunctor, ELUGradFunctor); \ __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ __macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \ + __macro(swish, SwishFunctor, SwishGradFunctor); \ __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index bd52bef2605874d26e880fb09e589891fc1934d5..b052374dc7ec3c5684d6adfda6b9d000c5e19fe0 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -1,6 +1,7 @@ import unittest import numpy as np from op_test import OpTest +from scipy.special import expit class TestExp(OpTest): @@ -455,5 +456,20 @@ class TestHardSigmoid(OpTest): self.check_grad(['X'], 'Y', max_relative_error=0.002) +class TestSwish(OpTest): + def setUp(self): + self.op_type = "swish" + X = np.random.uniform(0.1, 1, [11, 17]).astype("float32") + self.inputs = {'X': X} + self.attrs = {'beta': 2.3} + self.outputs = {'Y': X * expit(self.attrs['beta'] * X)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.008) + + if __name__ == "__main__": unittest.main()