diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 6220a2409a4e300293a4d1befbeb9977f17233bf..943c6f80ebdab9340b12826d366b2c8b3e76491b 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -363,13 +363,6 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Out", "Output of LeakyRelu operator"); AddAttr("alpha", "The small negative slope").SetDefault(0.02f); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); - AddAttr("is_test", - "(bool, default false) Set to true for inference only, false " - "for training. Some layers may run faster when this is true.") - .SetDefault(false); AddComment(R"DOC( LeakyRelu Activation Operator. @@ -702,8 +695,6 @@ class LeakyReluDoubleGradMaker op->SetType("leaky_relu_grad_grad"); // input1: X op->SetInput("X", Input("X")); - // input2: Out - op->SetInput("Out", Input("Out")); // X@GRAD@GRAD: ddx op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); op->SetAttrMap(Attrs()); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index ba5633ea8a556cd969d12bde3e47ce96741062b8..b516fc8a418599d429e47748f53e8a6ed1f65624 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1001,7 +1001,7 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { dx.device(d) = dout * (temp1 + temp2).template cast(); } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepXOut; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; template diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 0e3232019f6faba8d327c1f26a7fde5cfdd225e9..1767ebaf8c39d4eca40b03d8bdd4f6778f088de4 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -77,7 +77,8 @@ class MKLDNNActivationGradKernel template void eltwise_forward(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm) { + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); auto &dev_ctx = ctx.template device_context(); @@ -89,9 +90,6 @@ void eltwise_forward(const framework::ExecutionContext &ctx, const T *x_data = x->data(); T *y_data = y->mutable_data(ctx.GetPlace()); - const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr("alpha") : 0; - const T beta = ctx.op().HasAttr("beta") ? ctx.Attr("beta") : 0; - PADDLE_ENFORCE( x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4, "Input dim must be with 2, 3 or 4"); @@ -103,9 +101,10 @@ void eltwise_forward(const framework::ExecutionContext &ctx, bool is_test = ctx.Attr("is_test"); + // TODO(jczaja): When adding leaky-relu , swish , elu make sure to extend key + // with alpha, beta std::string key = platform::MKLDNNHandler::GetHash( - src_tz, std::to_string(algorithm) + std::to_string(alpha) + - std::to_string(beta) + ctx.op().Output("Out")); + src_tz, std::to_string(algorithm) + ctx.op().Output("Out")); // TODO(jczaja): Make it Thread safe // save input data and layout to be referred in backward path @@ -154,7 +153,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, template void eltwise_grad(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm) { + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); @@ -164,9 +164,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx, const T *diff_y_data = diff_y->data(); T *diff_x_data = diff_x->mutable_data(ctx.GetPlace()); - const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr("alpha") : 0; - const T beta = ctx.op().HasAttr("beta") ? ctx.Attr("beta") : 0; - std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); auto diff_y_format = @@ -176,8 +173,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); std::string key = platform::MKLDNNHandler::GetHash( - diff_dst_tz, std::to_string(algorithm) + std::to_string(alpha) + - std::to_string(beta) + ctx.op().Input("Out")); + diff_dst_tz, std::to_string(algorithm) + ctx.op().Input("Out")); const std::string key_src_data = key + "@eltwise_fwd_src_data"; const std::string key_src_layout = key + "@eltwise_fwd_src_layout"; @@ -277,11 +273,10 @@ namespace ops = paddle::operators; act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \ ops::MKLDNNActivationGradKernel>); -#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ - __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ - __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ - __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ - __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ +#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ + __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ + __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index fb9cc6b3a17a4381e71d825c47e100486f6739d7..7099387b887003a205c0dfb4c8e9c83f89e29494 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -18,7 +18,7 @@ import unittest import numpy as np import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu +from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd @@ -29,13 +29,6 @@ class TestMKLDNNReluDim2(TestRelu): self.attrs = {"use_mkldnn": True} -class TestMKLDNNLeakyReluDim2(TestLeakyRelu): - def setUp(self): - super(TestMKLDNNLeakyReluDim2, self).setUp() - - self.attrs = {"use_mkldnn": True} - - class TestMKLDNNTanhDim2(TestTanh): def setUp(self): super(TestMKLDNNTanhDim2, self).setUp() @@ -70,20 +63,6 @@ class TestMKLDNNReluDim4(TestRelu): self.attrs = {"use_mkldnn": True} -class TestMKLDNNLeakyReluDim4(TestLeakyRelu): - def setUp(self): - super(TestMKLDNNLeakyReluDim4, self).setUp() - - x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32") - # The same reason with TestAbs - x[np.abs(x) < 0.005] = 0.02 - out = np.maximum(x, 0.02 * x) - - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.outputs = {'Out': out} - self.attrs = {"use_mkldnn": True} - - class TestMKLDNNTanhDim4(TestTanh): def setUp(self): super(TestMKLDNNTanhDim4, self).setUp() diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 0a4f2bf1792ef42ce8ef6189def4249085100dc9..4d66b7a989732e37c48c73b9617943874ad07bba 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -367,25 +367,6 @@ class TestRelu(TestActivation): self.check_grad(['X'], 'Out', max_relative_error=0.007) -class TestLeakyRelu(TestActivation): - def setUp(self): - self.op_type = "leaky_relu" - self.init_dtype() - - x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) - # The same reason with TestAbs - x[np.abs(x) < 0.005] = 0.02 - out = np.maximum(x, 0.02 * x) - - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.outputs = {'Out': out} - - def test_check_grad(self): - if self.dtype == np.float16: - return - self.check_grad(['X'], 'Out', max_relative_error=0.007) - - class TestGelu(TestActivation): def setUp(self): self.op_type = "gelu"