From a815d6abcf49d4778d0a49c852c45264bd8a684a Mon Sep 17 00:00:00 2001 From: zhouxiao-coder Date: Fri, 29 Sep 2017 17:29:52 +0800 Subject: [PATCH] elu: Optimize gradient calculation;Add more comments --- paddle/operators/activation_op.cc | 25 ++++++++++++ paddle/operators/activation_op.cu | 4 ++ paddle/operators/activation_op.h | 40 +++++++++++++++++++ .../v2/framework/tests/test_activation_op.py | 20 ++++++++++ 4 files changed, 89 insertions(+) diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 1e1d3cf7f76..e83666c9f92 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -174,6 +174,25 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { } }; +template +class ELUOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "Input of ELU operator, it shouldn't be empty. Input is flattened " + "and treated as a 1D array."); + AddOutput("Y", "Output of ELU operator, has same shape as the input."); + AddComment( + "ELU activation operator. It applies this element-wise computation on " + "the input: f(x) = max(0, x) + min(0, alpha * (exp(x) - 1))." + "Check .. _Link: https://arxiv.org/abs/1511.07289 for more details"); + AddAttr("alpha", + "alpha value in the elu formulation, default to 1.") + .SetDefault(static_cast(1.)); + } +}; + template class PowOpMaker : public framework::OpProtoAndCheckerMaker { public: @@ -311,6 +330,12 @@ REGISTER_OP_CPU_KERNEL(soft_relu, REGISTER_OP_CPU_KERNEL( soft_relu_grad, ops::SoftReluGradKernel); +REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, + ops::ActivationOpGrad); +REGISTER_OP_CPU_KERNEL(elu, ops::ELUKernel); +REGISTER_OP_CPU_KERNEL(elu_grad, + ops::ELUGradKernel); + REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, ops::ActivationOpGrad); REGISTER_OP_CPU_KERNEL(pow, ops::PowKernel); diff --git a/paddle/operators/activation_op.cu b/paddle/operators/activation_op.cu index 56886d8b1b9..48800b11ec5 100644 --- a/paddle/operators/activation_op.cu +++ b/paddle/operators/activation_op.cu @@ -97,6 +97,10 @@ REGISTER_OP_GPU_KERNEL(soft_relu, REGISTER_OP_GPU_KERNEL( soft_relu_grad, ops::SoftReluGradKernel); +REGISTER_OP_GPU_KERNEL(elu, ops::ELUKernel); +REGISTER_OP_GPU_KERNEL(elu_grad, + ops::ELUGradKernel); + REGISTER_OP_GPU_KERNEL(pow, ops::PowKernel); REGISTER_OP_GPU_KERNEL(pow_grad, ops::PowGradKernel); diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index b9f52e1af39..3428aca8174 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -296,6 +296,46 @@ class SoftReluGradKernel : public framework::OpKernel { } }; +template +class ELUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Y = context.Output("Y"); + auto alpha = static_cast(context.Attr("alpha")); + Y->mutable_data(context.GetPlace()); + + auto x = framework::EigenVector::Flatten(*X); + auto y = framework::EigenVector::Flatten(*Y); + auto place = context.GetEigenDevice(); + y.device(place) = + x.cwiseMax(static_cast(0)) + + (alpha * (x.exp() - static_cast(1))).cwiseMin(static_cast(0)); + } +}; + +template +class ELUGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Y = context.Input("Y"); + auto* dY = context.Input(framework::GradVarName("Y")); + auto* dX = context.Output(framework::GradVarName("X")); + auto alpha = static_cast(context.Attr("alpha")); + dX->mutable_data(context.GetPlace()); + + auto x = framework::EigenVector::Flatten(*X); + auto y = framework::EigenVector::Flatten(*Y); + auto dy = framework::EigenVector::Flatten(*dY); + auto dx = framework::EigenVector::Flatten(*dX); + auto place = context.GetEigenDevice(); + dx.device(place) = + dy * (x > static_cast(0)).template cast() + + dy * (y + alpha) * (x < static_cast(0)).template cast(); + } +}; + template class PowKernel : public framework::OpKernel { public: diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py index c44eb849063..9ea01d43c55 100644 --- a/python/paddle/v2/framework/tests/test_activation_op.py +++ b/python/paddle/v2/framework/tests/test_activation_op.py @@ -144,6 +144,26 @@ class TestSoftRelu(OpTest): self.check_grad(['X'], 'Y', max_relative_error=0.02) +class TestELU(OpTest): + def setUp(self): + self.op_type = "elu" + x = np.random.uniform(-3, 3, [4, 4]).astype("float32") + alpha = 1. + # Note: unlike other Relu extensions, point 0 on standard ELU function (i.e. alpha = 1) + # is differentiable, so we can skip modifications like x[np.abs(x) < 0.005] = 0.02 here + self.inputs = {'X': x} + self.attrs = {'alpha': alpha} + self.outputs = { + 'Y': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.02) + + class TestReciprocal(OpTest): def setUp(self): self.op_type = "reciprocal" -- GitLab