提交 9995aed1 编写于 作者: K kexinzhao 提交者: GitHub

Implementing Softplus operator (#4690)

* implementing softplus

* small fix

* small fix

* small fix

* small fix
上级 c3bf3326
......@@ -206,6 +206,17 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftplusOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator");
AddComment("Softplus activation operator, softplus(x) = log(1 + exp(x))");
}
};
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftsignOpMaker(framework::OpProto *proto,
......@@ -351,6 +362,9 @@ REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad);
REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad,
ops::ActivationOpGrad);
REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
ops::ActivationOpGrad);
......
......@@ -407,6 +407,33 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
}
};
// softplus(x) = log(1 + exp(x))
// When x is a very large positive number, exp(x) may explode to inf,
// Using trick below for numerical stability
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0)))
template <typename T>
struct SoftplusFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
y.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log());
}
};
// d(softplus(x))/dx = exp(x) / (1 + exp(x))
// For numerical stability:
// d(softplus(x))/dx = exp(x - max(x, 0)) / (exp(-max(x, 0)) +
// exp(x - max(x, 0)))
template <typename T>
struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
dx.device(d) = dy * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp()));
}
};
// softsign(x) = x / (1 + |x|)
template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> {
......@@ -582,6 +609,7 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
......
......@@ -331,6 +331,21 @@ class TestSTanh(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSoftplus(OpTest):
def setUp(self):
self.op_type = "softplus"
self.inputs = {
'X': np.random.uniform(-1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.log(1 + np.exp(self.inputs['X']))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSoftsign(OpTest):
def setUp(self):
self.op_type = "softsign"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册