From e4c2a8544ae17e3b58a8054184e05e4e2eb5ff4d Mon Sep 17 00:00:00 2001
From: Jacek Czaja <jacek.czaja@intel.com>
Date: Sat, 18 Sep 2021 04:10:45 +0200
Subject: [PATCH] [oneDNN] Disable caching of Reorder operation (#35664)

* - REorder disabling caching

* - compilation fix

* - another compilation fix

* - another compilation fix

* - compilation fix

* - Fix

* - yet another compilation fix

* - suppresingly another compilation fix

* - lint

* - fix after review

* - fix
---
 log                                           | Bin 0 -> 2816 bytes
 .../fluid/framework/data_layout_transform.cc  |   6 +-
 .../mkldnn/elementwise_add_mkldnn_op.cc       |   6 +-
 .../fluid/operators/mkldnn/cast_mkldnn_op.cc  |   8 +-
 .../fluid/operators/mkldnn/conv_mkldnn_op.cc  |   8 +-
 .../operators/mkldnn/expand_v2_mkldnn_op.cc   |   4 +-
 .../operators/mkldnn/matmul_mkldnn_op.cc      |   5 +-
 .../operators/mkldnn/reshape_mkldnn_op.cc     |  13 +-
 .../fluid/operators/mkldnn/slice_mkldnn_op.cc |  17 +--
 .../fluid/operators/mkldnn/split_mkldnn_op.cc |  14 +--
 .../fluid/operators/mkldnn/sum_mkldnn_op.cc   |   6 +-
 .../reduce_ops/mkldnn/reduce_mkldnn_op.h      |   6 +-
 paddle/fluid/platform/mkldnn_reuse.h          | 113 ++++--------------
 13 files changed, 54 insertions(+), 152 deletions(-)
 create mode 100644 log
diff --git a/log b/log
new file mode 100644
index 0000000000000000000000000000000000000000..c02e10686b5fbcc78a8591519aaa3389dac63a56
GIT binary patch
literal 2816
zcmds3U2oeq6y39b#f5-jGORjC+XWlj1V|&dE9Sb{>LA+_L5s9agdzo!vg5V=@4b}l
zN{;O`8L$DHUnG;_<+<nFugmi^9S-PV^!okp7x?@w?(|^&le#GVlQS-J7k3jdrEmXl
zzAF^-Q)6>Ngtf*xLf+~HQ<9(CS(yv%TQ-B~Xgn9-=ByB4rcm7iO!M^x;DvU^LS8x#
z69JN=1(`9`kGkC#o$oI1L5vXT%rf49mYUvfFFgkLHd1G%K4-PLAKV&v)Y#kbcK8{!
z+<dryneU%#rdEH6_-E9?+fpQ*-gxj~9DCtB&5T|=<y0FhN+)Q>vbjhgD@uyWWTFcn
z1IDRkxmX%|Lr9v+94c8q9w%^olEAa437kCtyr19J{l`Bw0D3)`92JanC61=5l>BHD
z2uJyi;#$)RPk-L&d69=b0WbZk5E_BN7#;cg{@U0jvmCu@xNCM_vFs*n!zru{^D@s@
zw6HRlUM>O~_no5!L*L!O<7b^-rkHc^?$=>D8vTMIDPc$E0*RD*Hm>+9%88O02{#@1
zEUv*}7U-GO0_sNs8&(Lp405!D*}x|2Z)segL5}fICTQG<n80+&X~Ai%OQi~L@uSI|
zm0D<M-B?3S#X@G{!e}pUJt1CUKKGTGQ6(pIA*_=|$9uFNy&u!j)o>Vxi<aT?wl|w(
zztU{Mq+$~#?j(96%HFqV5XuFCSJx@MOtB)EQt^=TPIy?(g@#8Z7eq7RJqi{KLI@7n
zUoyVBwc#HsCTj<qwMMsZd)jv8`hYWSbfHLv;<prXHuvs-f^!|ATj{u~{9}0CJ&o<-
z4TeV@*sv}fmD*J)fbShlF&YK`2B-f4tXs|a_Q{!ID{OF~6{pOzVfBxYo0^Ii>AM8q
z?Z?rzHJB&0!7AObf5RW^)_Z`tK`XlvP3ZWnLQ?~HCvsy~C<v|+Qfe-ye+N@-_JNqT
z7+U@F4E_0Vh7ira!_d8$;xPYgrlG!*I6`#c>=cgS(<dBYcK2SM!bwZER{rfq)s>dm
z)<u}`1aAdL>ri-&-iA3c%o{k^VW)ur12U#VsmJ*PM~`O-JWuYa_R)_xep*BZ6SSnw
W5<HHOcPvU5?MEI!?D4glCcgk0PBfkX

literal 0
HcmV?d00001

diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index 8708d90485a..a42b686548c 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -179,11 +179,9 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
 
   if ((in_format != out_format) || always_copy) {
     void* in_data = GetDataFromTensor(in, in_type);
-    std::string key =
-        platform::CreateKey(*dev_ctx, in_tz, in_format, out_format, in_type);
 
-    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, *dev_ctx,
-                                           cpu_engine, key);
+    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
+                                           cpu_engine);
 
     auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
     auto reorder_dst_memory_p =
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
index 6cea4bfb990..4f41ecf04cf 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -43,10 +43,8 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
 
     auto tz = paddle::framework::vectorize<int64_t>(dout->dims());
     memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
-    std::string key = platform::CreateKey(dev_ctx, tz, dout->format(),
-                                          dout->format(), dout_type);
-    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, dev_ctx,
-                                           onednn_engine, key);
+    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
+                                           onednn_engine);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
     auto reorder_src_memory_p = handler.AcquireSrcMemory(
diff --git a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
index 9cfeace6bef..6f79c2dccf5 100644
--- a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc
@@ -43,11 +43,9 @@ class CastMKLDNNKernel : public framework::OpKernel<T> {
 
     auto x_tz = framework::vectorize(x->dims());
 
-    std::string key =
-        platform::CreateKey(dev_ctx, x_tz, x->format(), x->format(), x_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_tz, x_paddle_type, x_type, out_paddle_type, out_type, dev_ctx,
-        dev_ctx.GetEngine(), key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_tz, x_paddle_type, x_type,
+                                                   out_paddle_type, out_type,
+                                                   dev_ctx.GetEngine());
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
index b353ce4c322..09386fc31ee 100644
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -1125,12 +1125,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
         mkldnn::memory::format_tag out_format =
             weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw
                                    : mkldnn::memory::format_tag::goihw;
-        std::string key = platform::CreateKey(dev_ctx, weights_tz, filter_fmt,
-                                              out_format, in_type);
-        key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
-
-        platform::ReorderMKLDNNHandler handler(
-            weights_tz, filter->type(), in_type, dev_ctx, mkldnn_engine, key);
+        platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(),
+                                               in_type, mkldnn_engine);
         auto reorder_dst_memory_p =
             handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace());
 
diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
index 829c948c1a5..d537c3dbf9f 100644
--- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
@@ -114,10 +114,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
     if (dout_vec_dims == dx_vec_dims) {
       mkldnn::memory::data_type dout_type =
           paddle::framework::ToMKLDNNDataType(dout->type());
-      std::string key = paddle::platform::CreateKey(
-          dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type);
       paddle::platform::ReorderMKLDNNHandler reorder_handler(
-          dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+          dout_vec_dims, dout->type(), dout_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           dout->format(), paddle::platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
index 723c3c8352d..b78acd32e6d 100644
--- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
@@ -58,11 +58,8 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx,
 
   memory::data_type input_type =
       paddle::framework::ToMKLDNNDataType(input->type());
-  std::string key = paddle::platform::CreateKey(
-      dev_ctx, input_dims, input->format(), input->format(), input_type);
   paddle::platform::ReorderMKLDNNHandler reorder_handler(
-      output_dims, input->type(), input_type, dev_ctx, dev_ctx.GetEngine(),
-      key);
+      output_dims, input->type(), input_type, dev_ctx.GetEngine());
 
   auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
       memory::format_tag::abc,
diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
index d5e428bd805..e6a7f3e74fc 100644
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -93,10 +93,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
     }
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    std::string key =
-        platform::CreateKey(dev_ctx, x_vec_dims, x->format(), x_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
@@ -253,11 +251,8 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T> {
 
     mkldnn::memory::data_type dout_type =
         framework::ToMKLDNNDataType(dout->type());
-    std::string key =
-        platform::CreateKey(dev_ctx, dout_vec_dims, this->getPlainFormatTag(dx),
-                            dx->format(), dout_type);
-    platform::ReorderMKLDNNHandler reorder_handler(
-        dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
+                                                   dout_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         dout->format(), platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
index facf5ca4b83..6bc3413604e 100644
--- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
@@ -98,18 +98,16 @@ class SliceMKLDNNKernel : public framework::OpKernel<T> {
     out->Resize(framework::make_ddim(slice_dims));
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    auto key = platform::CreateKey(dev_ctx, x_vec_dims, axes, starts, ends,
-                                   x->format(), x_type);
 
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
     auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
                                                         reorder_src_memory_p);
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        out, slice_dims, 0, get_plain_format_tag(x), ctx.GetPlace());
+        out, slice_dims, get_plain_format_tag(x), ctx.GetPlace());
 
     auto reorder_p =
         reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);
@@ -201,16 +199,13 @@ class SliceGradMKLDNNKernel : public framework::OpKernel<T> {
     mkldnn::memory::format_tag reorder_format_tag =
         platform::GetMKLDNNFormat(md.reshape(slice_dims));
 
-    auto key = platform::CreateKey(dev_ctx, dout_vec_dims, axes, starts, ends,
-                                   reorder_format_tag, dout_type);
-
-    platform::ReorderMKLDNNHandler reorder_handler(
-        slice_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(),
+                                                   dout_type, onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         reorder_format_tag, platform::to_void_cast(dout->data<T>()));
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        dx, dx_vec_dims, 0, reorder_format_tag, ctx.GetPlace());
+        dx, dx_vec_dims, reorder_format_tag, ctx.GetPlace());
     memset(dx->data<T>(), 0, reorder_dst_memory_p->get_desc().get_size());
 
     auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets,
diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
index 8a58d9f26f8..411f33276c3 100644
--- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
@@ -91,27 +91,25 @@ class SplitMKLDNNKernel : public framework::OpKernel<T> {
     auto x_vec_dims = framework::vectorize(x_dims);
 
     mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    auto key = platform::CreateKey(dev_ctx, x_vec_dims, axis, num, sections,
-                                   x->format(), x_type);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
 
     std::vector<int64_t> offset(x_vec_dims.size(), 0);
 
-    platform::ReorderMKLDNNHandler reorder_handler(
-        x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key);
+    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
+                                                   x_type, onednn_engine);
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
 
     for (size_t i = 0; i < outs_number; ++i) {
       auto out_vec_dims = framework::vectorize(outs[i]->dims());
-      auto slice_mem_p = reorder_handler.AcquireSubmemory(
-          out_vec_dims, offset, reorder_src_memory_p, i);
+      auto slice_mem_p = reorder_handler.AcquireSubmemory(out_vec_dims, offset,
+                                                          reorder_src_memory_p);
 
       auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-          outs[i], out_vec_dims, i, x->format(), ctx.GetPlace());
+          outs[i], out_vec_dims, x->format(), ctx.GetPlace());
       auto reorder_p =
-          reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p, i);
+          reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p);
 
       reorder_p->execute(astream, *slice_mem_p, *reorder_dst_memory_p);
 
diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
index 4cc9f53b9b6..8208a484b4a 100644
--- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
@@ -155,15 +155,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     // For in-place execution which sum does not have we need to fake it
     // so from oneDNN dst memory we reorder data into input
     if (in_place) {
-      const std::string reorder_key =
-          platform::CreateKey(dev_ctx, framework::vectorize(output->dims()),
-                              ctx.OutputName("Out") + "-I");
-
       auto& in_out = in_vars[0]->Get<framework::LoDTensor>();
       auto output_tz = framework::vectorize<int64_t>(output->dims());
       platform::ReorderMKLDNNHandler reorder_handler(
           output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()),
-          dev_ctx, dev_ctx.GetEngine(), reorder_key);
+          dev_ctx.GetEngine());
 
       auto target_mem = reorder_handler.AcquireDstMemory(
           output, in_out.format(), ctx.GetPlace());
diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
index 0165cfd8b80..17801454da2 100644
--- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
@@ -71,10 +71,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
     if (input_dims == output_dims) {
       mkldnn::memory::data_type input_type =
           framework::ToMKLDNNDataType(input->type());
-      std::string key = platform::CreateKey(
-          dev_ctx, input_dims, input->format(), input->format(), input_type);
-      platform::ReorderMKLDNNHandler reorder_handler(
-          input_dims, input->type(), input_type, dev_ctx, onednn_engine, key);
+      platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(),
+                                                     input_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           input->format(), platform::to_void_cast(input->data<T>()));
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 49160f94632..29a3f8e9dcd 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -1071,138 +1071,73 @@ class ActivationMKLDNNHandler
   }
 };
 
-class ReorderMKLDNNHandler : public MKLDNNHandler {
+class ReorderMKLDNNHandler {
  public:
   ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                        framework::proto::VarType::Type vtype,
-                       mkldnn::memory::data_type dtype,
-                       const platform::MKLDNNDeviceContext& dev_ctx,
-                       mkldnn::engine engine, const std::string& base_key)
-      : platform::MKLDNNHandler(dev_ctx, engine, base_key),
-        dims_(dims),
+                       mkldnn::memory::data_type dtype, mkldnn::engine engine)
+      : dims_(dims),
         vtype_(vtype),
         vtype_dst_(vtype),
         dtype_(dtype),
-        dtype_dst_(dtype) {}
+        dtype_dst_(dtype),
+        engine_(engine) {}
 
   ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                        framework::proto::VarType::Type vtype,
                        mkldnn::memory::data_type dtype,
                        framework::proto::VarType::Type vtype_dst,
                        mkldnn::memory::data_type dtype_dst,
-                       const platform::MKLDNNDeviceContext& dev_ctx,
-                       mkldnn::engine engine, const std::string& base_key)
-      : platform::MKLDNNHandler(dev_ctx, engine, base_key),
-        dims_(dims),
+                       mkldnn::engine engine)
+      : dims_(dims),
         vtype_(vtype),
         vtype_dst_(vtype_dst),
         dtype_(dtype),
-        dtype_dst_(dtype_dst) {}
+        dtype_dst_(dtype_dst),
+        engine_(engine) {}
 
   std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
       const MKLDNNMemoryFormat& fmt, void* ptr) {
-    return this->AcquireMemory(dims_, dtype_, fmt, ptr, "@user_src_mem_p");
+    auto md = mkldnn::memory::desc(dims_, dtype_, fmt);
+    return std::make_shared<mkldnn::memory>(md, engine_, ptr);
   }
 
   std::shared_ptr<mkldnn::memory> AcquireSubmemory(
       const std::vector<int64_t>& dims, const std::vector<int64_t>& offset,
-      const std::shared_ptr<mkldnn::memory>& mem_p, int submemory_number = 0) {
-    std::string local_key = key_;
-    local_key.append("@submem")
-        .append(std::to_string(submemory_number))
-        .append("_p");
-
-    auto sub_mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (sub_mem_p == nullptr) {
-      auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
-      sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
-                                                   mem_p->get_data_handle());
-      dev_ctx_.SetBlob(local_key, sub_mem_p);
-    } else {
-      sub_mem_p->set_data_handle(mem_p->get_data_handle());
-    }
+      const std::shared_ptr<mkldnn::memory>& mem_p) {
+    auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
+    auto sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
+                                                      mem_p->get_data_handle());
     return sub_mem_p;
   }
 
   std::shared_ptr<mkldnn::memory> AcquireDstMemory(
       framework::Tensor* output, const MKLDNNMemoryFormat& fmt,
       platform::Place place) {
-    auto local_key = key_ + "@user_dst_mem_p";
-    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (mem_p == nullptr) {
-      auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, dst_md.get_size());
-
-      mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
-      dev_ctx_.SetBlob(local_key, mem_p);
-    } else {
-      // Even if memory object exists , we may be using it for diffrent tensor
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
-      mem_p->set_data_handle(dst_data);
-    }
-    return mem_p;
+    auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
+    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
   }
 
   std::shared_ptr<mkldnn::memory> AcquireDstMemory(
       framework::Tensor* output, const std::vector<int64_t>& dims,
-      const int memory_number, const MKLDNNMemoryFormat& fmt,
-      platform::Place place) {
-    auto local_key =
-        key_ + "@user_dst_mem" + std::to_string(memory_number) + "_p";
-    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
-    if (mem_p == nullptr) {
-      auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, dst_md.get_size());
-
-      mem_p = std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
-      dev_ctx_.SetBlob(local_key, mem_p);
-    } else {
-      // Even if memory object exists , we may be using it for diffrent tensor
-      auto dst_data =
-          output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size());
-      mem_p->set_data_handle(dst_data);
-    }
-    return mem_p;
-  }
-
-  std::shared_ptr<mkldnn::reorder> AcquireReorder(
-      std::shared_ptr<mkldnn::memory> dst_memory_p,
-      std::shared_ptr<mkldnn::memory> src_memory_p, int reorder_number) {
-    auto prim_key = key_ + "@reorder" + std::to_string(reorder_number) + "_p";
-    auto reorder_p =
-        std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
-    if (reorder_p == nullptr) {
-      reorder_p =
-          std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
-      dev_ctx_.SetBlob(prim_key, reorder_p);
-    }
-    return reorder_p;
+      const MKLDNNMemoryFormat& fmt, platform::Place place) {
+    auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
+    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
   }
 
   std::shared_ptr<mkldnn::reorder> AcquireReorder(
       std::shared_ptr<mkldnn::memory> dst_memory_p,
       std::shared_ptr<mkldnn::memory> src_memory_p) {
-    auto prim_key = key_ + "@reorder_p";
-    auto reorder_p =
-        std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
-    if (reorder_p == nullptr) {
-      reorder_p =
-          std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
-      dev_ctx_.SetBlob(prim_key, reorder_p);
-    }
-    return reorder_p;
+    return std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
   }
 
  private:
   std::vector<int64_t> dims_;
   framework::proto::VarType::Type vtype_, vtype_dst_;
   mkldnn::memory::data_type dtype_, dtype_dst_;
+  mkldnn::engine engine_;
 };
 
 template <typename T>
-- 
GitLab