From 50326563d5c9c6925f7ecb12e3c5a346d26f0fa0 Mon Sep 17 00:00:00 2001
From: Leo Zhao <48052473+LeoZhao-Intel@users.noreply.github.com>
Date: Tue, 4 Jun 2019 21:29:30 +0800
Subject: [PATCH] enable mkldnn primitive reuse for platform reorder (#17826)

test=develop
---
 .../fluid/framework/data_layout_transform.cc  | 22 +++--
 paddle/fluid/platform/mkldnn_reuse.h          | 87 +++++++++++++++++++
 2 files changed, 102 insertions(+), 7 deletions(-)
diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index 72c50518af0..bbcd34260e3 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -13,11 +13,13 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/data_layout_transform.h"
+#include <string>
 #include <vector>
 
 #include "paddle/fluid/operators/math/math_function.h"
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
 #endif
 
 namespace paddle {
@@ -145,7 +147,6 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
   memory::data_type in_type = ToMKLDNNDataType(in.type());
   PADDLE_ENFORCE(in_type != memory::data_type::data_undef,
                  "Input tensor type is not supported: %s", in.type());
-  memory::data_type out_type = in_type;
 
   auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
   auto out_format =
@@ -156,14 +157,21 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
 
   if (in_format != out_format) {
     void* in_data = GetDataFromTensor(in, in_type);
-    auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
+    const std::string key = platform::ReorderMKLDNNHandler::GetHash(
+        in_tz, in_format, out_format, std::to_string(in_type));
 
-    auto in_memory =
-        memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
-    auto out_memory =
-        memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
+    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, *dev_ctx,
+                                           cpu_engine, key);
 
-    platform::Reorder(in_memory, out_memory);
+    auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
+    auto reorder_dst_memory_p =
+        handler.AcquireDstMemory(out, out_format, expected_kernel_type.place_);
+    auto reorder_p =
+        handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
+
+    std::vector<mkldnn::primitive> pipeline;
+    pipeline.push_back(*reorder_p);
+    mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
   } else {
     out->ShareDataWith(in);
   }
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 4011f08cea8..87405fb5507 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -400,6 +400,93 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
   std::vector<int> logical_axis_;
 };
 
+class ReorderMKLDNNHandler : public MKLDNNHandler {
+ public:
+  ReorderMKLDNNHandler(std::vector<int>& dims,  // NOLINT
+                       framework::proto::VarType::Type vtype,
+                       mkldnn::memory::data_type dtype,
+                       const platform::MKLDNNDeviceContext& dev_ctx,
+                       mkldnn::engine engine, const std::string& base_key)
+      : platform::MKLDNNHandler(dev_ctx, engine, base_key),
+        dims_(dims),
+        vtype_(vtype),
+        dtype_(dtype) {}
+
+  std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
+      const mkldnn::memory::format& fmt, void* ptr) {
+    auto local_key = key_ + "@user_src_mem_p";
+    auto mem_p =
+        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
+    PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false),
+                   " find mem primitive in device context");
+    if (mem_p == nullptr) {
+      auto src_md = platform::MKLDNNMemDesc(dims_, dtype_, fmt);
+      mem_p = std::make_shared<mkldnn::memory>(
+          mkldnn::memory::primitive_desc{src_md, engine_}, ptr);
+      dev_ctx_.SetBlob(local_key, mem_p);
+    } else {
+      mem_p->set_data_handle(ptr);
+      is_reusing_ = true;
+    }
+    return mem_p;
+  }
+
+  std::shared_ptr<mkldnn::memory> AcquireDstMemory(
+      framework::Tensor* output, const mkldnn::memory::format& fmt,
+      platform::Place place) {
+    auto local_key = key_ + "@user_dst_mem_p";
+    auto mem_p =
+        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
+    PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false),
+                   " find mem primitive in device context");
+    if (mem_p == nullptr) {
+      auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_, fmt);
+      auto dst_mdp = mkldnn::memory::primitive_desc{dst_md, engine_};
+
+      auto dst_data = output->mutable_data(place, vtype_);
+
+      mem_p = std::make_shared<mkldnn::memory>(dst_mdp, dst_data);
+      dev_ctx_.SetBlob(local_key, mem_p);
+    } else {
+      auto dst_data = output->mutable_data(place, vtype_);
+      mem_p->set_data_handle(dst_data);
+      is_reusing_ = true;
+    }
+    return mem_p;
+  }
+
+  std::shared_ptr<mkldnn::reorder> AcquireReorder(
+      std::shared_ptr<mkldnn::memory> dst_memory_p,
+      std::shared_ptr<mkldnn::memory> src_memory_p) {
+    auto prim_key = key_ + "@reorder_p";
+    auto reorder_p =
+        std::static_pointer_cast<mkldnn::reorder>(dev_ctx_.GetBlob(prim_key));
+    PADDLE_ENFORCE((reorder_p != nullptr) || (is_reusing_ == false),
+                   "Fail to find convolution primitive in device context");
+    if (reorder_p == nullptr) {
+      reorder_p =
+          std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
+      dev_ctx_.SetBlob(prim_key, reorder_p);
+    } else {
+      is_reusing_ = true;
+    }
+    return reorder_p;
+  }
+
+  static std::string GetHash(std::vector<int>& shape,  // NOLINT
+                             mkldnn::memory::format in_fmt,
+                             mkldnn::memory::format out_fmt,
+                             const std::string& suffix) {
+    return dims2str(shape) + std::to_string(in_fmt) + "->" +
+           std::to_string(out_fmt) + "#" + suffix;
+  }
+
+ private:
+  std::vector<int> dims_;
+  framework::proto::VarType::Type vtype_;
+  mkldnn::memory::data_type dtype_;
+};
+
 template <typename T>
 struct convolutional_algorithm;
 
-- 
GitLab