From 82773477ae6da1bdeba9f81ded8dd7f76b359f38 Mon Sep 17 00:00:00 2001
From: chengduo <zhaochengduo@baidu.com>
Date: Thu, 15 Nov 2018 19:07:09 +0800
Subject: [PATCH] Add selu (#14415)

* add selu

* use for range
test=develop

* add API
test=develop

* follow comment
test=develop

* update API.spec
test=develop
---
 paddle/fluid/API.spec                         |   1 +
 paddle/fluid/operators/selu_op.cc             | 135 ++++++++++++++++++
 paddle/fluid/operators/selu_op.cu             |  22 +++
 paddle/fluid/operators/selu_op.h              | 124 ++++++++++++++++
 python/paddle/fluid/layers/nn.py              |  42 ++++++
 .../fluid/tests/unittests/test_selu_op.py     |  71 +++++++++
 6 files changed, 395 insertions(+)
 create mode 100644 paddle/fluid/operators/selu_op.cc
 create mode 100644 paddle/fluid/operators/selu_op.cu
 create mode 100644 paddle/fluid/operators/selu_op.h
 create mode 100644 python/paddle/fluid/tests/unittests/test_selu_op.py
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 3378d210cd..da835b3305 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -128,6 +128,7 @@ paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates',
 paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
diff --git a/paddle/fluid/operators/selu_op.cc b/paddle/fluid/operators/selu_op.cc
new file mode 100644
index 0000000000..67fca18000
--- /dev/null
+++ b/paddle/fluid/operators/selu_op.cc
@@ -0,0 +1,135 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/selu_op.h"
+#include <string>
+
+namespace paddle {
+namespace operators {
+
+class SeluOp : public framework::OperatorWithKernel {
+ public:
+  SeluOp(const std::string &type, const framework::VariableNameMap &inputs,
+         const framework::VariableNameMap &outputs,
+         const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SeluOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of SeluOp should not be null.");
+
+    ctx->ShareDim("X", /*->*/ "Out");
+    ctx->ShareLoD("X", /*->*/ "Out");
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        framework::GetDataTypeOfVar(ctx.InputVar("X")), ctx.GetPlace());
+  }
+};
+
+class SeluOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
+ protected:
+  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
+      const override {
+    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Out"}};
+  }
+};
+
+class SeluOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "The input tensor of selu operator.");
+    AddOutput("Out", "The output tensor of selu operator.");
+    AddAttr<float>("scale",
+                   "(float) the default value is 1.0507~. For more "
+                   "information about this value, please refer to:"
+                   "https://arxiv.org/abs/1706.02515.")
+        .SetDefault(1.0507009873554804934193349852946);
+    AddAttr<float>("alpha",
+                   "(float) the default value is 1.6732~. For more "
+                   "information about this value, please refer to:"
+                   "https://arxiv.org/abs/1706.02515.")
+        .SetDefault(1.6732632423543772848170429916717);
+    AddComment(R"DOC(
+Selu Operator.
+
+The equation is:
+$$
+f(x) =\lambda*
+\begin{cases}
+ \quad \quad   x,  \quad \quad \quad \text{if} \ x > 0 \\
+ \alpha * e^x - \alpha,  \qquad  \text{if} \ x <= 0
+\end{cases}
+$$
+
+The input `X` can carry the LoD (Level of Details) information,
+or not. And the output shares the LoD information with input `X`.
+)DOC");
+  }
+};
+
+class SeluGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto *grad_op = new framework::OpDesc();
+    grad_op->SetType("selu_grad");
+    grad_op->SetInput("Out", Output("Out"));
+    grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
+    grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    grad_op->SetAttrMap(this->Attrs());
+    return std::unique_ptr<framework::OpDesc>(grad_op);
+  }
+};
+
+class SeluGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input(Out@GRAD) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null");
+    auto x_grad_name = framework::GradVarName("X");
+    ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("Out"));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        framework::GetDataTypeOfVar(ctx.InputVar("Out")), ctx.GetPlace());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(selu, ops::SeluOp, ops::SeluOpMaker, ops::SeluOpInferVarType,
+                  ops::SeluGradMaker);
+REGISTER_OPERATOR(selu_grad, ops::SeluGradOp);
+REGISTER_OP_CPU_KERNEL(
+    selu, ops::SeluKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::SeluKernel<paddle::platform::CPUDeviceContext, double>);
+REGISTER_OP_CPU_KERNEL(
+    selu_grad, ops::SeluGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::SeluGradKernel<paddle::platform::CPUDeviceContext, double>);
diff --git a/paddle/fluid/operators/selu_op.cu b/paddle/fluid/operators/selu_op.cu
new file mode 100644
index 0000000000..fb3245ab76
--- /dev/null
+++ b/paddle/fluid/operators/selu_op.cu
@@ -0,0 +1,22 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/selu_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    selu, ops::SeluKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SeluKernel<paddle::platform::CUDADeviceContext, double>);
+REGISTER_OP_CUDA_KERNEL(
+    selu_grad, ops::SeluGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SeluGradKernel<paddle::platform::CUDADeviceContext, double>);
diff --git a/paddle/fluid/operators/selu_op.h b/paddle/fluid/operators/selu_op.h
new file mode 100644
index 0000000000..bdb506885c
--- /dev/null
+++ b/paddle/fluid/operators/selu_op.h
@@ -0,0 +1,124 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <string>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/for_range.h"
+namespace paddle {
+namespace operators {
+
+static HOSTDEVICE float real_exp(float x) { return expf(x); }
+static HOSTDEVICE float real_exp(double x) { return exp(x); }
+
+template <typename T>
+struct SeluFunctor {
+  SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr)
+      : x_data_ptr_(x_data_ptr),
+        alpha_(alpha),
+        scale_(scale),
+        y_data_ptr_(y_data_ptr) {}
+
+  HOSTDEVICE void operator()(size_t idx) const {
+    T x_ele = x_data_ptr_[idx];
+    if (x_ele <= 0) {
+      x_ele = alpha_ * real_exp(x_ele) - alpha_;
+    }
+    y_data_ptr_[idx] = scale_ * x_ele;
+  }
+  const T* x_data_ptr_;
+  const float alpha_;
+  const float scale_;
+  T* y_data_ptr_;
+};
+
+template <typename T>
+struct SeluGradFunctor {
+  SeluGradFunctor(const T* y_data_ptr, const T* dy_data_ptr, float alpha,
+                  float scale, T* dx_data_ptr)
+      : y_data_ptr_(y_data_ptr),
+        dy_data_ptr_(dy_data_ptr),
+        alpha_(alpha),
+        scale_(scale),
+        la_(alpha * scale),
+        dx_data_ptr_(dx_data_ptr) {}
+
+  HOSTDEVICE void operator()(size_t idx) const {
+    T y_ele = y_data_ptr_[idx];
+    T dy_ele = dy_data_ptr_[idx];
+
+    float tmp = scale_;
+    if (y_ele <= 0) {
+      tmp = y_ele + la_;
+    }
+    dx_data_ptr_[idx] = dy_ele * tmp;
+  }
+  const T* y_data_ptr_;
+  const T* dy_data_ptr_;
+  const float alpha_;
+  const float scale_;
+  const float la_;
+  T* dx_data_ptr_;
+};
+
+template <typename DeviceContext, typename T>
+class SeluKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    using Tensor = framework::Tensor;
+
+    auto* x = context.Input<Tensor>("X");
+    auto* out = context.Output<Tensor>("Out");
+
+    float alpha = context.Attr<float>("alpha");
+    float scale = context.Attr<float>("scale");
+
+    auto out_ptr = out->mutable_data<T>(context.GetPlace());
+
+    SeluFunctor<T> functor(x->data<T>(), alpha, scale, out_ptr);
+
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    size_t limit = static_cast<size_t>(x->numel());
+    platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
+    for_range(functor);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class SeluGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    using Tensor = framework::Tensor;
+
+    auto* out = context.Input<Tensor>("Out");
+    auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
+
+    float alpha = context.Attr<float>("alpha");
+    float scale = context.Attr<float>("scale");
+
+    auto dx_ptr = dx->mutable_data<T>(context.GetPlace());
+
+    SeluGradFunctor<T> functor(out->data<T>(), dout->data<T>(), alpha, scale,
+                               dx_ptr);
+
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    size_t limit = static_cast<size_t>(out->numel());
+    platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
+    for_range(functor);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 1b5009e761..f60f373163 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -110,6 +110,7 @@ __all__ = [
     'random_crop',
     'mean_iou',
     'relu',
+    'selu',
     'log',
     'crop',
     'rank_loss',
@@ -6182,6 +6183,47 @@ def relu(x, name=None):
     return out
 
 
+@templatedoc()
+def selu(x, scale=None, alpha=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x (Variable): The input tensor.
+        scale(float, None): If the scale is not set,
+            the default value is 1.0507009873554804934193349852946.
+            For more information about this value, please refer
+            to: https://arxiv.org/abs/1706.02515.
+        alpha(float, None): If the alpha is not set,
+            the default value is 1.6732632423543772848170429916717.
+            For more information about this value, please refer
+            to: https://arxiv.org/abs/1706.02515.
+        name (str|None, default None): A name for this layer If set None,
+            the layer will be named automatically.
+
+    Returns:
+        Variable: The output tensor with the same shape as input.
+
+    Examples:
+
+        .. code-block:: python
+
+            output = fluid.layers.selu(x)
+    """
+    helper = LayerHelper('selu', **locals())
+    dtype = helper.input_dtype(input_param_name='x')
+    out = helper.create_variable_for_type_inference(dtype)
+    attrs = {}
+    if scale is not None:
+        attrs["scale"] = scale
+    if alpha is not None:
+        attrs["alpha"] = alpha
+
+    helper.append_op(
+        type="selu", inputs={"X": x}, outputs={"Out": out}, attrs=attrs)
+    return out
+
+
 def mean_iou(input, label, num_classes):
     """
     Mean Intersection-Over-Union is a common evaluation metric for
diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py
new file mode 100644
index 0000000000..bcba0511da
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_selu_op.py
@@ -0,0 +1,71 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import six
+from op_test import OpTest
+
+
+class SeluTest(OpTest):
+    def setUp(self):
+        self.op_type = "selu"
+        self.x_shape = [3, 5, 5, 10]
+        self.dtype = np.float32
+        self.init_x_shape()
+        self.init_dtype()
+
+        alpha = 1.6732632423543772848170429916717
+        scale = 1.0507009873554804934193349852946
+
+        x = np.random.normal(size=self.x_shape).astype(self.dtype)
+
+        # Since zero point in selu is not differentiable, avoid randomize
+        # zero.
+        x[np.abs(x) < 0.005] = 0.02
+
+        x_flat = x.flatten()
+
+        for i in range(x_flat.size):
+            if x_flat[i] < 0:
+                x_flat[i] = alpha * np.exp(x_flat[i]) - alpha
+            x_flat[i] = scale * x_flat[i]
+
+        out_np = x_flat.reshape(self.x_shape)
+
+        self.inputs = {'X': x}
+        self.outputs = {'Out': out_np}
+
+        self.attrs = {
+            'alpha': alpha,
+            'scale': scale,
+        }
+
+    def init_x_shape(self):
+        pass
+
+    def init_dtype(self):
+        pass
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab