From 82773477ae6da1bdeba9f81ded8dd7f76b359f38 Mon Sep 17 00:00:00 2001 From: chengduo Date: Thu, 15 Nov 2018 19:07:09 +0800 Subject: [PATCH] Add selu (#14415) * add selu * use for range test=develop * add API test=develop * follow comment test=develop * update API.spec test=develop --- paddle/fluid/API.spec | 1 + paddle/fluid/operators/selu_op.cc | 135 ++++++++++++++++++ paddle/fluid/operators/selu_op.cu | 22 +++ paddle/fluid/operators/selu_op.h | 124 ++++++++++++++++ python/paddle/fluid/layers/nn.py | 42 ++++++ .../fluid/tests/unittests/test_selu_op.py | 71 +++++++++ 6 files changed, 395 insertions(+) create mode 100644 paddle/fluid/operators/selu_op.cc create mode 100644 paddle/fluid/operators/selu_op.cu create mode 100644 paddle/fluid/operators/selu_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_selu_op.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 3378d210cd..da835b3305 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -128,6 +128,7 @@ paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) diff --git a/paddle/fluid/operators/selu_op.cc b/paddle/fluid/operators/selu_op.cc new file mode 100644 index 0000000000..67fca18000 --- /dev/null +++ b/paddle/fluid/operators/selu_op.cc @@ -0,0 +1,135 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/selu_op.h" +#include + +namespace paddle { +namespace operators { + +class SeluOp : public framework::OperatorWithKernel { + public: + SeluOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SeluOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SeluOp should not be null."); + + ctx->ShareDim("X", /*->*/ "Out"); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::GetDataTypeOfVar(ctx.InputVar("X")), ctx.GetPlace()); + } +}; + +class SeluOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + +class SeluOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input tensor of selu operator."); + AddOutput("Out", "The output tensor of selu operator."); + AddAttr("scale", + "(float) the default value is 1.0507~. For more " + "information about this value, please refer to:" + "https://arxiv.org/abs/1706.02515.") + .SetDefault(1.0507009873554804934193349852946); + AddAttr("alpha", + "(float) the default value is 1.6732~. For more " + "information about this value, please refer to:" + "https://arxiv.org/abs/1706.02515.") + .SetDefault(1.6732632423543772848170429916717); + AddComment(R"DOC( +Selu Operator. + +The equation is: +$$ +f(x) =\lambda* +\begin{cases} + \quad \quad x, \quad \quad \quad \text{if} \ x > 0 \\ + \alpha * e^x - \alpha, \qquad \text{if} \ x <= 0 +\end{cases} +$$ + +The input `X` can carry the LoD (Level of Details) information, +or not. And the output shares the LoD information with input `X`. +)DOC"); + } +}; + +class SeluGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); + grad_op->SetType("selu_grad"); + grad_op->SetInput("Out", Output("Out")); + grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + grad_op->SetAttrMap(this->Attrs()); + return std::unique_ptr(grad_op); + } +}; + +class SeluGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null"); + auto x_grad_name = framework::GradVarName("X"); + ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("Out")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::GetDataTypeOfVar(ctx.InputVar("Out")), ctx.GetPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(selu, ops::SeluOp, ops::SeluOpMaker, ops::SeluOpInferVarType, + ops::SeluGradMaker); +REGISTER_OPERATOR(selu_grad, ops::SeluGradOp); +REGISTER_OP_CPU_KERNEL( + selu, ops::SeluKernel, + ops::SeluKernel); +REGISTER_OP_CPU_KERNEL( + selu_grad, ops::SeluGradKernel, + ops::SeluGradKernel); diff --git a/paddle/fluid/operators/selu_op.cu b/paddle/fluid/operators/selu_op.cu new file mode 100644 index 0000000000..fb3245ab76 --- /dev/null +++ b/paddle/fluid/operators/selu_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/selu_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + selu, ops::SeluKernel, + ops::SeluKernel); +REGISTER_OP_CUDA_KERNEL( + selu_grad, ops::SeluGradKernel, + ops::SeluGradKernel); diff --git a/paddle/fluid/operators/selu_op.h b/paddle/fluid/operators/selu_op.h new file mode 100644 index 0000000000..bdb506885c --- /dev/null +++ b/paddle/fluid/operators/selu_op.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/for_range.h" +namespace paddle { +namespace operators { + +static HOSTDEVICE float real_exp(float x) { return expf(x); } +static HOSTDEVICE float real_exp(double x) { return exp(x); } + +template +struct SeluFunctor { + SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr) + : x_data_ptr_(x_data_ptr), + alpha_(alpha), + scale_(scale), + y_data_ptr_(y_data_ptr) {} + + HOSTDEVICE void operator()(size_t idx) const { + T x_ele = x_data_ptr_[idx]; + if (x_ele <= 0) { + x_ele = alpha_ * real_exp(x_ele) - alpha_; + } + y_data_ptr_[idx] = scale_ * x_ele; + } + const T* x_data_ptr_; + const float alpha_; + const float scale_; + T* y_data_ptr_; +}; + +template +struct SeluGradFunctor { + SeluGradFunctor(const T* y_data_ptr, const T* dy_data_ptr, float alpha, + float scale, T* dx_data_ptr) + : y_data_ptr_(y_data_ptr), + dy_data_ptr_(dy_data_ptr), + alpha_(alpha), + scale_(scale), + la_(alpha * scale), + dx_data_ptr_(dx_data_ptr) {} + + HOSTDEVICE void operator()(size_t idx) const { + T y_ele = y_data_ptr_[idx]; + T dy_ele = dy_data_ptr_[idx]; + + float tmp = scale_; + if (y_ele <= 0) { + tmp = y_ele + la_; + } + dx_data_ptr_[idx] = dy_ele * tmp; + } + const T* y_data_ptr_; + const T* dy_data_ptr_; + const float alpha_; + const float scale_; + const float la_; + T* dx_data_ptr_; +}; + +template +class SeluKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + using Tensor = framework::Tensor; + + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + + float alpha = context.Attr("alpha"); + float scale = context.Attr("scale"); + + auto out_ptr = out->mutable_data(context.GetPlace()); + + SeluFunctor functor(x->data(), alpha, scale, out_ptr); + + auto& dev_ctx = context.template device_context(); + size_t limit = static_cast(x->numel()); + platform::ForRange for_range(dev_ctx, limit); + for_range(functor); + } +}; + +template +class SeluGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + using Tensor = framework::Tensor; + + auto* out = context.Input("Out"); + auto* dout = context.Input(framework::GradVarName("Out")); + auto* dx = context.Output(framework::GradVarName("X")); + + float alpha = context.Attr("alpha"); + float scale = context.Attr("scale"); + + auto dx_ptr = dx->mutable_data(context.GetPlace()); + + SeluGradFunctor functor(out->data(), dout->data(), alpha, scale, + dx_ptr); + + auto& dev_ctx = context.template device_context(); + size_t limit = static_cast(out->numel()); + platform::ForRange for_range(dev_ctx, limit); + for_range(functor); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 1b5009e761..f60f373163 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -110,6 +110,7 @@ __all__ = [ 'random_crop', 'mean_iou', 'relu', + 'selu', 'log', 'crop', 'rank_loss', @@ -6182,6 +6183,47 @@ def relu(x, name=None): return out +@templatedoc() +def selu(x, scale=None, alpha=None, name=None): + """ + ${comment} + + Args: + x (Variable): The input tensor. + scale(float, None): If the scale is not set, + the default value is 1.0507009873554804934193349852946. + For more information about this value, please refer + to: https://arxiv.org/abs/1706.02515. + alpha(float, None): If the alpha is not set, + the default value is 1.6732632423543772848170429916717. + For more information about this value, please refer + to: https://arxiv.org/abs/1706.02515. + name (str|None, default None): A name for this layer If set None, + the layer will be named automatically. + + Returns: + Variable: The output tensor with the same shape as input. + + Examples: + + .. code-block:: python + + output = fluid.layers.selu(x) + """ + helper = LayerHelper('selu', **locals()) + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_variable_for_type_inference(dtype) + attrs = {} + if scale is not None: + attrs["scale"] = scale + if alpha is not None: + attrs["alpha"] = alpha + + helper.append_op( + type="selu", inputs={"X": x}, outputs={"Out": out}, attrs=attrs) + return out + + def mean_iou(input, label, num_classes): """ Mean Intersection-Over-Union is a common evaluation metric for diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py new file mode 100644 index 0000000000..bcba0511da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_selu_op.py @@ -0,0 +1,71 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import six +from op_test import OpTest + + +class SeluTest(OpTest): + def setUp(self): + self.op_type = "selu" + self.x_shape = [3, 5, 5, 10] + self.dtype = np.float32 + self.init_x_shape() + self.init_dtype() + + alpha = 1.6732632423543772848170429916717 + scale = 1.0507009873554804934193349852946 + + x = np.random.normal(size=self.x_shape).astype(self.dtype) + + # Since zero point in selu is not differentiable, avoid randomize + # zero. + x[np.abs(x) < 0.005] = 0.02 + + x_flat = x.flatten() + + for i in range(x_flat.size): + if x_flat[i] < 0: + x_flat[i] = alpha * np.exp(x_flat[i]) - alpha + x_flat[i] = scale * x_flat[i] + + out_np = x_flat.reshape(self.x_shape) + + self.inputs = {'X': x} + self.outputs = {'Out': out_np} + + self.attrs = { + 'alpha': alpha, + 'scale': scale, + } + + def init_x_shape(self): + pass + + def init_dtype(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == "__main__": + unittest.main() -- GitLab