From a815d6abcf49d4778d0a49c852c45264bd8a684a Mon Sep 17 00:00:00 2001
From: zhouxiao-coder <zhouxiaocoder@gmail.com>
Date: Fri, 29 Sep 2017 17:29:52 +0800
Subject: [PATCH] elu: Optimize gradient calculation;Add more comments

---
 paddle/operators/activation_op.cc             | 25 ++++++++++++
 paddle/operators/activation_op.cu             |  4 ++
 paddle/operators/activation_op.h              | 40 +++++++++++++++++++
 .../v2/framework/tests/test_activation_op.py  | 20 ++++++++++
 4 files changed, 89 insertions(+)
diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc
index 1e1d3cf7f76..e83666c9f92 100644
--- a/paddle/operators/activation_op.cc
+++ b/paddle/operators/activation_op.cc
@@ -174,6 +174,25 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
   }
 };
 
+template <typename AttrType>
+class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "Input of ELU operator, it shouldn't be empty. Input is flattened "
+             "and treated as a 1D array.");
+    AddOutput("Y", "Output of ELU operator, has same shape as the input.");
+    AddComment(
+        "ELU activation operator. It applies this element-wise computation on "
+        "the input: f(x) = max(0, x) + min(0, alpha * (exp(x) - 1))."
+        "Check .. _Link: https://arxiv.org/abs/1511.07289 for more details");
+    AddAttr<AttrType>("alpha",
+                      "alpha value in the elu formulation, default to 1.")
+        .SetDefault(static_cast<AttrType>(1.));
+  }
+};
+
 template <typename AttrType>
 class PowOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
@@ -311,6 +330,12 @@ REGISTER_OP_CPU_KERNEL(soft_relu,
 REGISTER_OP_CPU_KERNEL(
     soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::CPUPlace, float>);
 
+REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker<float>, elu_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(elu, ops::ELUKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(elu_grad,
+                       ops::ELUGradKernel<paddle::platform::CPUPlace, float>);
+
 REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
             ops::ActivationOpGrad);
 REGISTER_OP_CPU_KERNEL(pow, ops::PowKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/activation_op.cu b/paddle/operators/activation_op.cu
index 56886d8b1b9..48800b11ec5 100644
--- a/paddle/operators/activation_op.cu
+++ b/paddle/operators/activation_op.cu
@@ -97,6 +97,10 @@ REGISTER_OP_GPU_KERNEL(soft_relu,
 REGISTER_OP_GPU_KERNEL(
     soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::GPUPlace, float>);
 
+REGISTER_OP_GPU_KERNEL(elu, ops::ELUKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(elu_grad,
+                       ops::ELUGradKernel<paddle::platform::GPUPlace, float>);
+
 REGISTER_OP_GPU_KERNEL(pow, ops::PowKernel<paddle::platform::GPUPlace, float>);
 REGISTER_OP_GPU_KERNEL(pow_grad,
                        ops::PowGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h
index b9f52e1af39..3428aca8174 100644
--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
@@ -296,6 +296,46 @@ class SoftReluGradKernel : public framework::OpKernel<T> {
   }
 };
 
+template <typename Place, typename T, typename AttrType = T>
+class ELUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Output<framework::Tensor>("Y");
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    Y->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto place = context.GetEigenDevice<Place>();
+    y.device(place) =
+        x.cwiseMax(static_cast<T>(0)) +
+        (alpha * (x.exp() - static_cast<T>(1))).cwiseMin(static_cast<T>(0));
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class ELUGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<framework::Tensor>("X");
+    auto* Y = context.Input<framework::Tensor>("Y");
+    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
+    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    dX->mutable_data<T>(context.GetPlace());
+
+    auto x = framework::EigenVector<T>::Flatten(*X);
+    auto y = framework::EigenVector<T>::Flatten(*Y);
+    auto dy = framework::EigenVector<T>::Flatten(*dY);
+    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto place = context.GetEigenDevice<Place>();
+    dx.device(place) =
+        dy * (x > static_cast<T>(0)).template cast<T>() +
+        dy * (y + alpha) * (x < static_cast<T>(0)).template cast<T>();
+  }
+};
+
 template <typename Place, typename T, typename AttrType = T>
 class PowKernel : public framework::OpKernel<T> {
  public:
diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py
index c44eb849063..9ea01d43c55 100644
--- a/python/paddle/v2/framework/tests/test_activation_op.py
+++ b/python/paddle/v2/framework/tests/test_activation_op.py
@@ -144,6 +144,26 @@ class TestSoftRelu(OpTest):
         self.check_grad(['X'], 'Y', max_relative_error=0.02)
 
 
+class TestELU(OpTest):
+    def setUp(self):
+        self.op_type = "elu"
+        x = np.random.uniform(-3, 3, [4, 4]).astype("float32")
+        alpha = 1.
+        # Note: unlike other Relu extensions, point 0 on standard ELU function (i.e. alpha = 1)
+        # is differentiable, so we can skip modifications like x[np.abs(x) < 0.005] = 0.02 here
+        self.inputs = {'X': x}
+        self.attrs = {'alpha': alpha}
+        self.outputs = {
+            'Y': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1))
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Y', max_relative_error=0.02)
+
+
 class TestReciprocal(OpTest):
     def setUp(self):
         self.op_type = "reciprocal"
-- 
GitLab