diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 943c6f80ebdab9340b12826d366b2c8b3e76491b..6220a2409a4e300293a4d1befbeb9977f17233bf 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -363,6 +363,13 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Out", "Output of LeakyRelu operator"); AddAttr("alpha", "The small negative slope").SetDefault(0.02f); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); + AddAttr("is_test", + "(bool, default false) Set to true for inference only, false " + "for training. Some layers may run faster when this is true.") + .SetDefault(false); AddComment(R"DOC( LeakyRelu Activation Operator. @@ -695,6 +702,8 @@ class LeakyReluDoubleGradMaker op->SetType("leaky_relu_grad_grad"); // input1: X op->SetInput("X", Input("X")); + // input2: Out + op->SetInput("Out", Input("Out")); // X@GRAD@GRAD: ddx op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); op->SetAttrMap(Attrs()); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index b516fc8a418599d429e47748f53e8a6ed1f65624..ba5633ea8a556cd969d12bde3e47ce96741062b8 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1001,7 +1001,7 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { dx.device(d) = dout * (temp1 + temp2).template cast(); } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepXOut; } }; template diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 1767ebaf8c39d4eca40b03d8bdd4f6778f088de4..0e3232019f6faba8d327c1f26a7fde5cfdd225e9 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -77,8 +77,7 @@ class MKLDNNActivationGradKernel template void eltwise_forward(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm, const T alpha = 0, - const T beta = 0) { + mkldnn::algorithm algorithm) { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); auto &dev_ctx = ctx.template device_context(); @@ -90,6 +89,9 @@ void eltwise_forward(const framework::ExecutionContext &ctx, const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); + const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr("alpha") : 0; + const T beta = ctx.op().HasAttr("beta") ? ctx.Attr("beta") : 0; + PADDLE_ENFORCE( x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4, "Input dim must be with 2, 3 or 4"); @@ -101,10 +103,9 @@ void eltwise_forward(const framework::ExecutionContext &ctx, bool is_test = ctx.Attr("is_test"); - // TODO(jczaja): When adding leaky-relu , swish , elu make sure to extend key - // with alpha, beta std::string key = platform::MKLDNNHandler::GetHash( - src_tz, std::to_string(algorithm) + ctx.op().Output("Out")); + src_tz, std::to_string(algorithm) + std::to_string(alpha) + + std::to_string(beta) + ctx.op().Output("Out")); // TODO(jczaja): Make it Thread safe // save input data and layout to be referred in backward path @@ -153,8 +154,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, template void eltwise_grad(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm, const T alpha = 0, - const T beta = 0) { + mkldnn::algorithm algorithm) { auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); @@ -164,6 +164,9 @@ void eltwise_grad(const framework::ExecutionContext &ctx, const T *diff_y_data = diff_y->data(); T *diff_x_data = diff_x->mutable_data(ctx.GetPlace()); + const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr("alpha") : 0; + const T beta = ctx.op().HasAttr("beta") ? ctx.Attr("beta") : 0; + std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); auto diff_y_format = @@ -173,7 +176,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); std::string key = platform::MKLDNNHandler::GetHash( - diff_dst_tz, std::to_string(algorithm) + ctx.op().Input("Out")); + diff_dst_tz, std::to_string(algorithm) + std::to_string(alpha) + + std::to_string(beta) + ctx.op().Input("Out")); const std::string key_src_data = key + "@eltwise_fwd_src_data"; const std::string key_src_layout = key + "@eltwise_fwd_src_layout"; @@ -273,10 +277,11 @@ namespace ops = paddle::operators; act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \ ops::MKLDNNActivationGradKernel>); -#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ - __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ - __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ - __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ +#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ + __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ + __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index 7099387b887003a205c0dfb4c8e9c83f89e29494..fb9cc6b3a17a4381e71d825c47e100486f6739d7 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -18,7 +18,7 @@ import unittest import numpy as np import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs +from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd @@ -29,6 +29,13 @@ class TestMKLDNNReluDim2(TestRelu): self.attrs = {"use_mkldnn": True} +class TestMKLDNNLeakyReluDim2(TestLeakyRelu): + def setUp(self): + super(TestMKLDNNLeakyReluDim2, self).setUp() + + self.attrs = {"use_mkldnn": True} + + class TestMKLDNNTanhDim2(TestTanh): def setUp(self): super(TestMKLDNNTanhDim2, self).setUp() @@ -63,6 +70,20 @@ class TestMKLDNNReluDim4(TestRelu): self.attrs = {"use_mkldnn": True} +class TestMKLDNNLeakyReluDim4(TestLeakyRelu): + def setUp(self): + super(TestMKLDNNLeakyReluDim4, self).setUp() + + x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32") + # The same reason with TestAbs + x[np.abs(x) < 0.005] = 0.02 + out = np.maximum(x, 0.02 * x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + self.attrs = {"use_mkldnn": True} + + class TestMKLDNNTanhDim4(TestTanh): def setUp(self): super(TestMKLDNNTanhDim4, self).setUp() diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 4d66b7a989732e37c48c73b9617943874ad07bba..0a4f2bf1792ef42ce8ef6189def4249085100dc9 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -367,6 +367,25 @@ class TestRelu(TestActivation): self.check_grad(['X'], 'Out', max_relative_error=0.007) +class TestLeakyRelu(TestActivation): + def setUp(self): + self.op_type = "leaky_relu" + self.init_dtype() + + x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + # The same reason with TestAbs + x[np.abs(x) < 0.005] = 0.02 + out = np.maximum(x, 0.02 * x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + def test_check_grad(self): + if self.dtype == np.float16: + return + self.check_grad(['X'], 'Out', max_relative_error=0.007) + + class TestGelu(TestActivation): def setUp(self): self.op_type = "gelu"