From 3b954e1ddc9ce375a0e4b258cc57926b76f3e74a Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Thu, 12 Oct 2017 21:57:21 -0700 Subject: [PATCH] Adding Hard Sigmoid Activation (#4771) * Adding Hard Sigmoid Activation * Adding a comment for slope to be only positive * Fixing grammatical mistake in comment --- paddle/operators/activation_op.cc | 35 ++++++++ paddle/operators/activation_op.h | 79 +++++++++++++------ .../v2/framework/tests/test_activation_op.py | 28 +++++++ 3 files changed, 119 insertions(+), 23 deletions(-) diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index cba57ba57f5..84c3775b4fc 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { } }; +template +class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { + public: + HardSigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of HardSigmoid operator"); + AddOutput("Y", "Output of HardSigmoid operator"); + AddComment(R"DOC( +Hard Sigmoid activation operator. + +Segment-wise linear approximation of sigmoid[1]. +This is much faster than sigmoid. + +hard_sigmoid = max(0, min(1, slope * x + shift)) + +The slope should be positive. The offset can be either positive or negative. +The default slope and shift are set from [1]. +It is recommended to use the defaults for this activation. + +References: + [1] Noisy Activation Functions + (https://arxiv.org/abs/1603.00391) + + )DOC"); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(static_cast(0.2)); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(static_cast(0.5)); + } +}; + } // namespace operators } // namespace paddle @@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, thresholded_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, + hard_sigmoid_grad, ops::ActivationOpGrad); + #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ act_type, \ diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 502c33be103..4f4eb44fedc 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor { } }; +template +struct HardSigmoidFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y) const { + auto temp = x * static_cast(slope) + static_cast(offset); + y.device(d) = temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); + } +}; + +template +struct HardSigmoidGradFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + dx.device(d) = + dy * + ((y > static_cast(0)) * (y < static_cast(1))).template cast() * + static_cast(slope); + } +}; + } // namespace operators } // namespace paddle -#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ - __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ - __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ - __macro(exp, ExpFunctor, ExpGradFunctor); \ - __macro(relu, ReluFunctor, ReluGradFunctor); \ - __macro(tanh, TanhFunctor, TanhGradFunctor); \ - __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ - __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ - __macro(abs, AbsFunctor, AbsGradFunctor); \ - __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log, LogFunctor, LogGradFunctor); \ - __macro(square, SquareFunctor, SquareGradFunctor); \ - __macro(brelu, BReluFunctor, BReluGradFunctor); \ - __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ - __macro(pow, PowFunctor, PowGradFunctor); \ - __macro(stanh, STanhFunctor, STanhGradFunctor); \ - __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ - __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ - __macro(relu6, Relu6Functor, Relu6GradFunctor); \ - __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ - __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ - __macro(elu, ELUFunctor, ELUGradFunctor); \ - __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ +#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ + __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ + __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ + __macro(exp, ExpFunctor, ExpGradFunctor); \ + __macro(relu, ReluFunctor, ReluGradFunctor); \ + __macro(tanh, TanhFunctor, TanhGradFunctor); \ + __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ + __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(abs, AbsFunctor, AbsGradFunctor); \ + __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ + __macro(log, LogFunctor, LogGradFunctor); \ + __macro(square, SquareFunctor, SquareGradFunctor); \ + __macro(brelu, BReluFunctor, BReluGradFunctor); \ + __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ + __macro(pow, PowFunctor, PowGradFunctor); \ + __macro(stanh, STanhFunctor, STanhGradFunctor); \ + __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ + __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ + __macro(relu6, Relu6Functor, Relu6GradFunctor); \ + __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ + __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ + __macro(elu, ELUFunctor, ELUGradFunctor); \ + __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ + __macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \ __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py index 3acd00e3521..5831b880e4c 100644 --- a/python/paddle/v2/framework/tests/test_activation_op.py +++ b/python/paddle/v2/framework/tests/test_activation_op.py @@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest): self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) +class TestHardSigmoid(OpTest): + def setUp(self): + self.op_type = "hard_sigmoid" + self.relative_error = 0.002 + + X = np.random.uniform(-5, 5, [2, 2]).astype("float32") + slope = 0.2 + offset = 0.5 + lower_threshold = -offset / slope + upper_threshold = (1 - offset) / slope + + self.inputs = {'X': X} + # Same reason as TestAbs + X[np.abs(X - lower_threshold) < self.relative_error] = \ + lower_threshold + 0.2 + X[np.abs(X - upper_threshold) < self.relative_error] = \ + upper_threshold - 0.2 + + temp = X * slope + offset + self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.002) + + if __name__ == "__main__": unittest.main() -- GitLab