diff --git a/log b/log
new file mode 100644
index 0000000000000000000000000000000000000000..c02e10686b5fbcc78a8591519aaa3389dac63a56
Binary files /dev/null and b/log differ
diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index 8708d90485af8fffab7a5c04d3c132e1ced82364..a42b686548c71b2fd372040d3d65345a9b1ca802 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -179,11 +179,9 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
 
   if ((in_format != out_format) || always_copy) {
     void* in_data = GetDataFromTensor(in, in_type);
-    std::string key =
-        platform::CreateKey(*dev_ctx, in_tz, in_format, out_format, in_type);
 
-    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, *dev_ctx,
-                                           cpu_engine, key);
+    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
+                                           cpu_engine);
 
     auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
     auto reorder_dst_memory_p =
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
index 6cea4bfb990df8df644c2eefdfe2c0a379114818..4f41ecf04cf433c7760be4708c17062350683d6e 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -43,10 +43,8 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
 
     auto tz = paddle::framework::vectorize<int64_t>(dout->dims());
     memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
-    std::string key = platform::CreateKey(dev_ctx, tz, dout->format(),
-                                          dout->format(), dout_type);
-    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, dev_ctx,
-                                           onednn_engine, key);
+    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
+                                           onednn_engine);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
     auto reorder_src_memory_p = handler.AcquireSrcMemory(
diff --git a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
index 9cfeace6bef99f98fcaa79dae5ba2ff1885092aa..6f79c2dccf56b2683cdb961cb287afb095454509 100644
--- a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
@@ -43,11 +43,9 @@ class CastMKLDNNKernel : public framework::OpKernel<T> {
 
     auto x_tz = framework::vectorize(x->dims());
 
-    std::string key =
-        platform::CreateKey(dev_ctx, x_tz, x->format(), x->format(), x_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_tz, x_paddle_type, x_type, out_paddle_type, out_type, dev_ctx,
-        dev_ctx.GetEngine(), key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_tz, x_paddle_type, x_type,
+                                                   out_paddle_type, out_type,
+                                                   dev_ctx.GetEngine());
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
index b353ce4c322e40f339c0dca9d461105522591c13..09386fc31ee31b8ce2bfb3caf5aad053e2b2544c 100644
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -1125,12 +1125,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
         mkldnn::memory::format_tag out_format =
             weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw
                                    : mkldnn::memory::format_tag::goihw;
-        std::string key = platform::CreateKey(dev_ctx, weights_tz, filter_fmt,
-                                              out_format, in_type);
-        key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
-
-        platform::ReorderMKLDNNHandler handler(
-            weights_tz, filter->type(), in_type, dev_ctx, mkldnn_engine, key);
+        platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(),
+                                               in_type, mkldnn_engine);
         auto reorder_dst_memory_p =
             handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace());
 
diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
index 829c948c1a5d13baf9eea9d029a4fc7ffadde585..d537c3dbf9fddfc3f64d17ad2d39d5534a928108 100644
--- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
@@ -114,10 +114,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
     if (dout_vec_dims == dx_vec_dims) {
       mkldnn::memory::data_type dout_type =
           paddle::framework::ToMKLDNNDataType(dout->type());
-      std::string key = paddle::platform::CreateKey(
-          dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type);
       paddle::platform::ReorderMKLDNNHandler reorder_handler(
-          dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+          dout_vec_dims, dout->type(), dout_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           dout->format(), paddle::platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
index 723c3c8352d545e3f9c7f5014237fd487d4c32b1..b78acd32e6dc8fffb6be48679164fae565a111bf 100644
--- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
@@ -58,11 +58,8 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx,
 
   memory::data_type input_type =
       paddle::framework::ToMKLDNNDataType(input->type());
-  std::string key = paddle::platform::CreateKey(
-      dev_ctx, input_dims, input->format(), input->format(), input_type);
   paddle::platform::ReorderMKLDNNHandler reorder_handler(
-      output_dims, input->type(), input_type, dev_ctx, dev_ctx.GetEngine(),
-      key);
+      output_dims, input->type(), input_type, dev_ctx.GetEngine());
 
   auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
       memory::format_tag::abc,
diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
index d5e428bd805fb3e63aad962b9ac6c6d9451b3cf7..e6a7f3e74fcc7a22274c1e19d1d65926a0360e7f 100644
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -93,10 +93,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
     }
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    std::string key =
-        platform::CreateKey(dev_ctx, x_vec_dims, x->format(), x_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
@@ -253,11 +251,8 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T> {
 
     mkldnn::memory::data_type dout_type =
         framework::ToMKLDNNDataType(dout->type());
-    std::string key =
-        platform::CreateKey(dev_ctx, dout_vec_dims, this->getPlainFormatTag(dx),
-                            dx->format(), dout_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
+                                                   dout_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         dout->format(), platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
index facf5ca4b8397f3d499dfa751157fafd26a388ff..6bc3413604e221b3a9af16d131e4858fc0746510 100644
--- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
@@ -98,18 +98,16 @@ class SliceMKLDNNKernel : public framework::OpKernel<T> {
     out->Resize(framework::make_ddim(slice_dims));
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    auto key = platform::CreateKey(dev_ctx, x_vec_dims, axes, starts, ends,
-                                   x->format(), x_type);
 
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
     auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
                                                         reorder_src_memory_p);
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        out, slice_dims, 0, get_plain_format_tag(x), ctx.GetPlace());
+        out, slice_dims, get_plain_format_tag(x), ctx.GetPlace());
 
     auto reorder_p =
         reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);
@@ -201,16 +199,13 @@ class SliceGradMKLDNNKernel : public framework::OpKernel<T> {
     mkldnn::memory::format_tag reorder_format_tag =
         platform::GetMKLDNNFormat(md.reshape(slice_dims));
 
-    auto key = platform::CreateKey(dev_ctx, dout_vec_dims, axes, starts, ends,
-                                   reorder_format_tag, dout_type);
-
-    platform::ReorderMKLDNNHandler reorder_handler(
-        slice_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(),
+                                                   dout_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         reorder_format_tag, platform::to_void_cast(dout->data<T>()));
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        dx, dx_vec_dims, 0, reorder_format_tag, ctx.GetPlace());
+        dx, dx_vec_dims, reorder_format_tag, ctx.GetPlace());
     memset(dx->data<T>(), 0, reorder_dst_memory_p->get_desc().get_size());
 
     auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
index 8a58d9f26f87bcc1e22356239346f725bfac6083..411f33276c361c24bbf89dae7365bd9953ceed9d 100644
--- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
@@ -91,27 +91,25 @@ class SplitMKLDNNKernel : public framework::OpKernel<T> {
     auto x_vec_dims = framework::vectorize(x_dims);
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    auto key = platform::CreateKey(dev_ctx, x_vec_dims, axis, num, sections,
-                                   x->format(), x_type);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
 
     std::vector<int64_t> offset(x_vec_dims.size(), 0);
 
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
 
     for (size_t i = 0; i < outs_number; ++i) {
       auto out_vec_dims = framework::vectorize(outs[i]->dims());
-      auto slice_mem_p = reorder_handler.AcquireSubmemory(
-          out_vec_dims, offset, reorder_src_memory_p, i);
+      auto slice_mem_p = reorder_handler.AcquireSubmemory(out_vec_dims, offset,
+                                                          reorder_src_memory_p);
 
       auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-          outs[i], out_vec_dims, i, x->format(), ctx.GetPlace());
+          outs[i], out_vec_dims, x->format(), ctx.GetPlace());
       auto reorder_p =
-          reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p, i);
+          reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);
 
       reorder_p->execute(astream, *slice_mem_p, *reorder_dst_memory_p);
 
diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
index 4cc9f53b9b6b2227691b3c6237927638b401f029..8208a484b4a32eae4d384b8464731ee9032762cc 100644
--- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
@@ -155,15 +155,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     // For in-place execution which sum does not have we need to fake it
     // so from oneDNN dst memory we reorder data into input
     if (in_place) {
-      const std::string reorder_key =
-          platform::CreateKey(dev_ctx, framework::vectorize(output->dims()),
-                              ctx.OutputName("Out") + "-I");
-
       auto& in_out = in_vars[0]->Get<framework::LoDTensor>();
       auto output_tz = framework::vectorize<int64_t>(output->dims());
       platform::ReorderMKLDNNHandler reorder_handler(
           output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()),
-          dev_ctx, dev_ctx.GetEngine(), reorder_key);
+          dev_ctx.GetEngine());
 
       auto target_mem = reorder_handler.AcquireDstMemory(
           output, in_out.format(), ctx.GetPlace());
diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
index 0165cfd8b808994d6232ddc0ea5ecfedc8de14b1..17801454da2fa2cbbea9b9ddce1eeb1e6263cf6c 100644
--- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
@@ -71,10 +71,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
     if (input_dims == output_dims) {
       mkldnn::memory::data_type input_type =
           framework::ToMKLDNNDataType(input->type());
-      std::string key = platform::CreateKey(
-          dev_ctx, input_dims, input->format(), input->format(), input_type);
-      platform::ReorderMKLDNNHandler reorder_handler(
-          input_dims, input->type(), input_type, dev_ctx, onednn_engine, key);
+      platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(),
+                                                     input_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           input->format(), platform::to_void_cast(input->data<T>()));
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 49160f9463240d848baddebe46a7ea02e31946e7..29a3f8e9dcd3cd628972c0ac77dfedfc85ed9854 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -1071,138 +1071,73 @@ class ActivationMKLDNNHandler
   }
 };
 
-class ReorderMKLDNNHandler : public MKLDNNHandler {
+class ReorderMKLDNNHandler {
  public:
   ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                        framework::proto::VarType::Type vtype,
-                       mkldnn::memory::data_type dtype,
-                       const platform::MKLDNNDeviceContext& dev_ctx,
-                       mkldnn::engine engine, const std::string& base_key)
-      : platform::MKLDNNHandler(dev_ctx, engine, base_key),
-        dims_(dims),
+                       mkldnn::memory::data_type dtype, mkldnn::engine engine)
+      : dims_(dims),
         vtype_(vtype),
         vtype_dst_(vtype),
         dtype_(dtype),
-        dtype_dst_(dtype) {}
+        dtype_dst_(dtype),
+        engine_(engine) {}
 
   ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                        framework::proto::VarType::Type vtype,
                        mkldnn::memory::data_type dtype,
                        framework::proto::VarType::Type vtype_dst,
                        mkldnn::memory::data_type dtype_dst,
-                       const platform::MKLDNNDeviceContext& dev_ctx,
-                       mkldnn::engine engine, const std::string& base_key)
-      : platform::MKLDNNHandler(dev_ctx, engine, base_key),
-        dims_(dims),
+                       mkldnn::engine engine)
+      : dims_(dims),
         vtype_(vtype),
         vtype_dst_(vtype_dst),
         dtype_(dtype),
-        dtype_dst_(dtype_dst) {}
+        dtype_dst_(dtype_dst),
+        engine_(engine) {}
 
   std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
       const MKLDNNMemoryFormat& fmt, void* ptr) {
-    return this->AcquireMemory(dims_, dtype_, fmt, ptr, "@user_src_mem_p");
+    auto md = mkldnn::memory::desc(dims_, dtype_, fmt);
+    return std::make_shared<mkldnn::memory>(md, engine_, ptr);
   }
 
   std::shared_ptr<mkldnn::memory> AcquireSubmemory(
       const std::vector<int64_t>& dims, const std::vector<int64_t>& offset,
-      const std::shared_ptr<mkldnn::memory>& mem_p, int submemory_number = 0) {
-    std::string local_key = key_;
-    local_key.append("@submem")
-        .append(std::to_string(submemory_number))
-        .append("_p");
-
-    auto sub_mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (sub_mem_p == nullptr) {
-      auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
-      sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
-                                                   mem_p->get_data_handle());
-      dev_ctx_.SetBlob(local_key, sub_mem_p);
-    } else {
-      sub_mem_p->set_data_handle(mem_p->get_data_handle());
-    }
+      const std::shared_ptr<mkldnn::memory>& mem_p) {
+    auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
+    auto sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
+                                                      mem_p->get_data_handle());
     return sub_mem_p;
   }
 
   std::shared_ptr<mkldnn::memory> AcquireDstMemory(
       framework::Tensor* output, const MKLDNNMemoryFormat& fmt,
       platform::Place place) {
-    auto local_key = key_ + "@user_dst_mem_p";
-    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (mem_p == nullptr) {
-      auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, dst_md.get_size());
-
-      mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
-      dev_ctx_.SetBlob(local_key, mem_p);
-    } else {
-      // Even if memory object exists , we may be using it for diffrent tensor
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
-      mem_p->set_data_handle(dst_data);
-    }
-    return mem_p;
+    auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
+    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
   }
 
   std::shared_ptr<mkldnn::memory> AcquireDstMemory(
       framework::Tensor* output, const std::vector<int64_t>& dims,
-      const int memory_number, const MKLDNNMemoryFormat& fmt,
-      platform::Place place) {
-    auto local_key =
-        key_ + "@user_dst_mem" + std::to_string(memory_number) + "_p";
-    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (mem_p == nullptr) {
-      auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, dst_md.get_size());
-
-      mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
-      dev_ctx_.SetBlob(local_key, mem_p);
-    } else {
-      // Even if memory object exists , we may be using it for diffrent tensor
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
-      mem_p->set_data_handle(dst_data);
-    }
-    return mem_p;
-  }
-
-  std::shared_ptr<mkldnn::reorder> AcquireReorder(
-      std::shared_ptr<mkldnn::memory> dst_memory_p,
-      std::shared_ptr<mkldnn::memory> src_memory_p, int reorder_number) {
-    auto prim_key = key_ + "@reorder" + std::to_string(reorder_number) + "_p";
-    auto reorder_p =
-        std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
-    if (reorder_p == nullptr) {
-      reorder_p =
-          std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
-      dev_ctx_.SetBlob(prim_key, reorder_p);
-    }
-    return reorder_p;
+      const MKLDNNMemoryFormat& fmt, platform::Place place) {
+    auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
+    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
   }
 
   std::shared_ptr<mkldnn::reorder> AcquireReorder(
       std::shared_ptr<mkldnn::memory> dst_memory_p,
       std::shared_ptr<mkldnn::memory> src_memory_p) {
-    auto prim_key = key_ + "@reorder_p";
-    auto reorder_p =
-        std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
-    if (reorder_p == nullptr) {
-      reorder_p =
-          std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
-      dev_ctx_.SetBlob(prim_key, reorder_p);
-    }
-    return reorder_p;
+    return std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
   }
 
  private:
   std::vector<int64_t> dims_;
   framework::proto::VarType::Type vtype_, vtype_dst_;
   mkldnn::memory::data_type dtype_, dtype_dst_;
+  mkldnn::engine engine_;
 };
 
 template <typename T>