diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
index ddad70a6a5f31ccb974f78ca35f045c59f45b8be..46201d5e5c9a12bdff92ff6aed53fe9ac81788e2 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -47,13 +47,13 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
     float scale_o = ctx.Attr<float>("Scale_out");
     int axis = ctx.Attr<int>("axis");
 
-    platform::BinaryMKLDNNHandler<T> handler(
-        BINARY_OP, axis, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z,
-        scale_x, scale_y, scale_o, ctx.OutputName("Out"));
+    platform::BinaryMKLDNNHandler<T> handler( BINARY_OP, axis, mkldnn_engine, ctx.GetPlace(), x, y, z, scale_x, scale_y, scale_o);
 
     const auto src_x_memory = handler.AcquireSrcMemory(x);
     const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
-    const auto dst_memory = handler.AcquireDstMemory(z);
+    // For Inplace src and and dst are the same memory object
+    auto dst_memory =
+        x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z);
 
     const auto binary_prim = handler.AcquireForwardPrimitive();
 
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
index 1c246e8d18937087639129d32001a297eec3ca42..508867ed37434eaa536f1819cc83e8b69cb2464b 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
@@ -48,9 +48,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     if (dx) {
       // dx = dout*y
       platform::BinaryMKLDNNHandler<T> handler(
-          dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine,
-          ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f,
-          ctx.InputName(framework::GradVarName("Out")));
+          dnnl::algorithm::binary_mul, axis, mkldnn_engine,
+          ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f);
 
       const auto src_dout_memory = handler.AcquireSrcMemory(dout);
       const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
@@ -75,9 +74,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
       // Handler is having nullptr passed instead of output tensor as
       // we want Dst buffer to be allocated by oneDNN not to use Tensor
       platform::BinaryMKLDNNHandler<T> handler(
-          dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine,
-          ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f,
-          ctx.InputName(framework::GradVarName("Out")));
+          dnnl::algorithm::binary_mul, axis, mkldnn_engine,
+          ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f);
 
       const auto src_dout_memory = handler.AcquireSrcMemory(dout);
       const auto src_x_memory = handler.AcquireSecondSrcMemory(x);
diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
index 91cc466207217aacc3f8ad4d9f5f544e5b48e0b7..fc647e184cb816b5df685d8c3369d3d9bf9a99bc 100644
--- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
@@ -57,9 +57,8 @@ class SoftmaxMKLDNNHandler
                        platform::Place cpu_place, const Tensor* out,
                        const Tensor* out_grad, Tensor* in_x_grad,
                        const std::string& unique_name)
-      : platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
-                                 mkldnn::softmax_backward>(
-            dev_ctx, mkldnn_engine, cpu_place) {
+      : platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
+                                 mkldnn::softmax_backward>(mkldnn_engine, cpu_place) {
       PADDLE_ENFORCE_EQ(
           out_grad->dims(), in_x_grad->dims(),
           platform::errors::InvalidArgument("The shape of softmax_grad's input "
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index afdd41f706a65d509dc0bb4fed3778a20d05f0f0..f846f7010e095a654c9b78cc2ad8785dacbfbcc6 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -49,18 +49,17 @@ class MKLDNNHandlerNoCachingT {
   }
 
   std::shared_ptr<TForward> AcquireForwardPrimitive() {
-     return  forward_p = std::make_shared<TForward>(*fwd_pd_);
+     return std::make_shared<TForward>(*fwd_pd_);
   }
 
   std::shared_ptr<TBackward> AcquireBackwardPrimitive() {
-     return  backward_p = std::make_shared<TBackward>(*bwd_pd_);
+     return std::make_shared<TBackward>(*bwd_pd_);
   }
 
   std::shared_ptr<TBackward_params> AcquireBackwardWeightsPrimitive() {
       PADDLE_ENFORCE_NOT_NULL(bwd_w_pd_, platform::errors::Unavailable(
                                              "Error: BWD_PD should be set when "
-                                             "getting BWD prim witk key: %s .",
-                                             key_p));
+                                             "getting BWD prim ."));
      return std::make_shared<TBackward_params>(*bwd_w_pd_);
   }
 
@@ -802,19 +801,13 @@ class MKLDNNHandler {
 };
 
 template <typename T>
-class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::binary> {
+class BinaryMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
  public:
   BinaryMKLDNNHandler(const dnnl::algorithm algo, const int axis,
-                      const MKLDNNDeviceContext& dev_ctx,
                       const mkldnn::engine engine, platform::Place cpu_place,
                       const Tensor* x, const Tensor* y, Tensor* z,
-                      float scale_x, float scale_y, float scale_z,
-                      const std::string& uniq_name)
-      : platform::MKLDNNHandlerT<T, dnnl::binary>(
-            dev_ctx, engine, cpu_place,
-            platform::CreateKey(dev_ctx, framework::vectorize(x->dims()),
-                                uniq_name)) {
-    if (!this->isCached()) {
+                      float scale_x, float scale_y, float scale_z)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
       PADDLE_ENFORCE_EQ(
           x->layout(), DataLayout::kMKLDNN,
           platform::errors::InvalidArgument("Wrong layout set for X tensor."));
@@ -858,7 +851,6 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::binary> {
       auto attributes = CreateAttributes(algo, scale_x, scale_y, scale_z);
       this->AcquireForwardPrimitiveDescriptor(attributes, algo, src0_md,
                                               src1_md, dst_md);
-    }
   }
 
   std::shared_ptr<mkldnn::memory> AcquireSecondSrcMemory(