diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
index 6220a2409a4e300293a4d1befbeb9977f17233bf..943c6f80ebdab9340b12826d366b2c8b3e76491b 100644
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -363,13 +363,6 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X", "Input of LeakyRelu operator");
     AddOutput("Out", "Output of LeakyRelu operator");
     AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
-    AddAttr<bool>("use_mkldnn",
-                  "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
-    AddAttr<bool>("is_test",
-                  "(bool, default false) Set to true for inference only, false "
-                  "for training. Some layers may run faster when this is true.")
-        .SetDefault(false);
     AddComment(R"DOC(
 LeakyRelu Activation Operator.
 
@@ -702,8 +695,6 @@ class LeakyReluDoubleGradMaker
     op->SetType("leaky_relu_grad_grad");
     // input1: X
     op->SetInput("X", Input("X"));
-    // input2: Out
-    op->SetInput("Out", Input("Out"));
     // X@GRAD@GRAD: ddx
     op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
     op->SetAttrMap(Attrs());
diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h
index ba5633ea8a556cd969d12bde3e47ce96741062b8..b516fc8a418599d429e47748f53e8a6ed1f65624 100644
--- a/paddle/fluid/operators/activation_op.h
+++ b/paddle/fluid/operators/activation_op.h
@@ -1001,7 +1001,7 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor<T> {
     dx.device(d) = dout * (temp1 + temp2).template cast<T>();
   }
 
-  static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepXOut; }
+  static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
 };
 
 template <typename T>
diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index 0e3232019f6faba8d327c1f26a7fde5cfdd225e9..1767ebaf8c39d4eca40b03d8bdd4f6778f088de4 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -77,7 +77,8 @@ class MKLDNNActivationGradKernel
 
 template <typename T>
 void eltwise_forward(const framework::ExecutionContext &ctx,
-                     mkldnn::algorithm algorithm) {
+                     mkldnn::algorithm algorithm, const T alpha = 0,
+                     const T beta = 0) {
   PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
                  "It must use CPUPlace.");
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
@@ -89,9 +90,6 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
   const T *x_data = x->data<T>();
   T *y_data = y->mutable_data<T>(ctx.GetPlace());
 
-  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
-  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;
-
   PADDLE_ENFORCE(
       x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4,
       "Input dim must be with 2, 3 or 4");
@@ -103,9 +101,10 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
   bool is_test = ctx.Attr<bool>("is_test");
 
+  // TODO(jczaja): When adding leaky-relu , swish , elu make sure to extend key
+  // with alpha, beta
   std::string key = platform::MKLDNNHandler::GetHash(
-      src_tz, std::to_string(algorithm) + std::to_string(alpha) +
-                  std::to_string(beta) + ctx.op().Output("Out"));
+      src_tz, std::to_string(algorithm) + ctx.op().Output("Out"));
 
   // TODO(jczaja): Make it Thread safe
   // save input data and layout to be referred in backward path
@@ -154,7 +153,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
 template <typename T>
 void eltwise_grad(const framework::ExecutionContext &ctx,
-                  mkldnn::algorithm algorithm) {
+                  mkldnn::algorithm algorithm, const T alpha = 0,
+                  const T beta = 0) {
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
   const auto &mkldnn_engine = dev_ctx.GetEngine();
 
@@ -164,9 +164,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
   const T *diff_y_data = diff_y->data<T>();
   T *diff_x_data = diff_x->mutable_data<T>(ctx.GetPlace());
 
-  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
-  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;
-
   std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
 
   auto diff_y_format =
@@ -176,8 +173,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
       diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
 
   std::string key = platform::MKLDNNHandler::GetHash(
-      diff_dst_tz, std::to_string(algorithm) + std::to_string(alpha) +
-                       std::to_string(beta) + ctx.op().Input("Out"));
+      diff_dst_tz, std::to_string(algorithm) + ctx.op().Input("Out"));
 
   const std::string key_src_data = key + "@eltwise_fwd_src_data";
   const std::string key_src_layout = key + "@eltwise_fwd_src_layout";
@@ -277,11 +273,10 @@ namespace ops = paddle::operators;
       act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace,               \
       ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);
 
-#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                  \
-  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);       \
-  __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
-  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor);       \
-  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor);       \
+#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)            \
+  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
+  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \
+  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \
   __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);
 
 FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
index fb9cc6b3a17a4381e71d825c47e100486f6739d7..7099387b887003a205c0dfb4c8e9c83f89e29494 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
@@ -18,7 +18,7 @@ import unittest
 import numpy as np
 import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.op_test import OpTest
-from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu
+from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs
 from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
 
 
@@ -29,13 +29,6 @@ class TestMKLDNNReluDim2(TestRelu):
         self.attrs = {"use_mkldnn": True}
 
 
-class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
-    def setUp(self):
-        super(TestMKLDNNLeakyReluDim2, self).setUp()
-
-        self.attrs = {"use_mkldnn": True}
-
-
 class TestMKLDNNTanhDim2(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim2, self).setUp()
@@ -70,20 +63,6 @@ class TestMKLDNNReluDim4(TestRelu):
         self.attrs = {"use_mkldnn": True}
 
 
-class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
-    def setUp(self):
-        super(TestMKLDNNLeakyReluDim4, self).setUp()
-
-        x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32")
-        # The same reason with TestAbs
-        x[np.abs(x) < 0.005] = 0.02
-        out = np.maximum(x, 0.02 * x)
-
-        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.outputs = {'Out': out}
-        self.attrs = {"use_mkldnn": True}
-
-
 class TestMKLDNNTanhDim4(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim4, self).setUp()
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 0a4f2bf1792ef42ce8ef6189def4249085100dc9..4d66b7a989732e37c48c73b9617943874ad07bba 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -367,25 +367,6 @@ class TestRelu(TestActivation):
         self.check_grad(['X'], 'Out', max_relative_error=0.007)
 
 
-class TestLeakyRelu(TestActivation):
-    def setUp(self):
-        self.op_type = "leaky_relu"
-        self.init_dtype()
-
-        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
-        # The same reason with TestAbs
-        x[np.abs(x) < 0.005] = 0.02
-        out = np.maximum(x, 0.02 * x)
-
-        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.outputs = {'Out': out}
-
-    def test_check_grad(self):
-        if self.dtype == np.float16:
-            return
-        self.check_grad(['X'], 'Out', max_relative_error=0.007)
-
-
 class TestGelu(TestActivation):
     def setUp(self):
         self.op_type = "gelu"