From 5d604a6b3050343efe5b62149ebcb06354e2b051 Mon Sep 17 00:00:00 2001
From: Wojciech Uss <wojciech.uss@intel.com>
Date: Wed, 27 Jan 2021 08:43:00 +0100
Subject: [PATCH] - Disabling oneDNN inplace pass (#30588) (#30710)

Co-authored-by: Jacek Czaja <jacek.czaja@intel.com>
---
 .../inference/api/paddle_pass_builder.cc      |  7 +++--
 .../operators/mkldnn/activation_mkldnn_op.cc  |  6 ++---
 .../fluid/operators/mkldnn/pool_mkldnn_op.cc  |  2 +-
 .../operators/mkldnn/softmax_mkldnn_op.cc     | 21 +++++++++------
 paddle/fluid/platform/mkldnn_reuse.h          | 27 ++++++++++---------
 5 files changed, 35 insertions(+), 28 deletions(-)
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index 2e9b8e0d145..24c3645f990 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -224,12 +224,11 @@ void CpuPassStrategy::EnableMKLDNN() {
              // "fc_mkldnn_pass",
              // "fc_act_mkldnn_fuse_pass",
              "batch_norm_act_fuse_pass",
-#ifndef _WIN32
              // TODO(intel): Please fix the bug on windows.
              // https://github.com/PaddlePaddle/Paddle/issues/29710
-             "mkldnn_inplace_pass",  // This pass should be activated after
-                                     // fuses
-#endif
+             // "mkldnn_inplace_pass",  // This pass should be activated after
+             // fuses. Disabled by default due to
+             // little gain and lots of problems
          })) {
       passes_.push_back(pass);
     }
diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index 0971be6cfef..5c49e87730e 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -99,17 +99,17 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
                                       "5, or 6, but now the dimension size is",
                                       x->dims().size()));
 
+  bool is_inplaced = x->IsSharedBufferWith(*y);
   auto src_tz = framework::vectorize<int64_t>(x->dims());
 
   auto src_format = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format();
 
   platform::ActivationMKLDNNHandler<T> handler(
       src_tz, algorithm, alpha, beta, src_format, dev_ctx, ctx.GetPlace(),
-      ctx.InputName("X"));
+      ctx.InputName("X"), is_inplaced);
 
   auto src_memory_p = handler.AcquireSrcMemory(x);
-  auto dst_memory_p =
-      x->IsSharedBufferWith(*y) ? src_memory_p : handler.AcquireDstMemory(y);
+  auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y);
   auto activation_p = handler.AcquireForwardPrimitive();
 
   mkldnn::stream astream(dev_ctx.GetEngine());
diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
index 9488a1a4405..858e7831713 100644
--- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
@@ -127,7 +127,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
                   strides, ksize);
 
     platform::PoolingMKLDNNHandler<T>::ComputeAdaptivePoolParameters(
-        ctx, paddle::framework::vectorize(in_x->dims()), ksize, strides);
+        ctx, paddle::framework::vectorize(in_x->dims()), &ksize, &strides);
 
     auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
index 3eb2e7084a0..abe0a556536 100644
--- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
@@ -48,13 +48,17 @@ class SoftmaxMKLDNNHandler
                        const mkldnn::engine mkldnn_engine,
                        platform::Place cpu_place, const Tensor* input,
                        Tensor* output, const int axis,
-                       const std::string uniq_name)
+                       const std::string uniq_name, bool is_inplaced)
       : platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
                                  mkldnn::softmax_backward>(
             dev_ctx, mkldnn_engine, cpu_place,
             // Softmax may be inplace then uniq_name is no longer unique
-            platform::CreateKey(dev_ctx, framework::vectorize(input->dims()),
-                                axis, uniq_name)) {
+            is_inplaced ? platform::CreateKey(
+                              dev_ctx, framework::vectorize(input->dims()),
+                              axis, uniq_name)
+                        : platform::CreateKey(
+                              dev_ctx, framework::vectorize(input->dims()),
+                              uniq_name)) {
     if (!this->isCached()) {
       PADDLE_ENFORCE_EQ(
           input->dims(), output->dims(),
@@ -78,7 +82,7 @@ class SoftmaxMKLDNNHandler
       : platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
                                  mkldnn::softmax_backward>(
             dev_ctx, dev_ctx.GetEngine(), cpu_place,
-            platform::CreateKey(dev_ctx, dims, axis, uniq_name)) {
+            platform::CreateKey(dev_ctx, dims, uniq_name)) {
     auto data_softmax_md =
         mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
     auto diff_softmax_md =
@@ -98,17 +102,18 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
 
     const Tensor* input = ctx.Input<Tensor>("X");
     Tensor* output = ctx.Output<Tensor>("Out");
+    bool is_inplaced = input->IsSharedBufferWith(*output);
 
     const int axis = CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
 
     SoftmaxMKLDNNHandler<T> handler(dev_ctx, mkldnn_engine, ctx.GetPlace(),
-                                    input, output, axis, ctx.OutputName("Out"));
+                                    input, output, axis, ctx.OutputName("Out"),
+                                    is_inplaced);
 
     auto softmax_src_memory_p = handler.AcquireSrcMemory(input);
     // For Inplace src and and dst are the same memory object
-    auto softmax_dst_memory_p = input->IsSharedBufferWith(*output)
-                                    ? softmax_src_memory_p
-                                    : handler.AcquireDstMemory(output);
+    auto softmax_dst_memory_p =
+        is_inplaced ? softmax_src_memory_p : handler.AcquireDstMemory(output);
 
     auto softmax_p = handler.AcquireForwardPrimitive();
 
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index e884d879ffa..42227da3ccb 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -601,12 +601,15 @@ class ActivationMKLDNNHandler
                           const MKLDNNMemoryFormat fmt,
                           const platform::MKLDNNDeviceContext& dev_ctx,
                           platform::Place cpu_place,
-                          const std::string& unique_name)
+                          const std::string& unique_name, bool is_inplaced)
 
       : platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
                                  mkldnn::eltwise_backward>(
             dev_ctx, dev_ctx.GetEngine(), cpu_place,
-            platform::CreateKey(dev_ctx, dims, "a", algorithm, unique_name)) {
+            is_inplaced
+                ? platform::CreateKey(dev_ctx, dims, "a", algorithm,
+                                      unique_name)
+                : platform::CreateKey(dev_ctx, dims, "a", unique_name)) {
     auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
 
     this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training,
@@ -624,7 +627,7 @@ class ActivationMKLDNNHandler
       : platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
                                  mkldnn::eltwise_backward>(
             dev_ctx, dev_ctx.GetEngine(), cpu_place,
-            platform::CreateKey(dev_ctx, dims, "a", algorithm, unique_name)) {
+            platform::CreateKey(dev_ctx, dims, "a", unique_name)) {
     auto diff_dst_md = platform::MKLDNNMemDesc(
         dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
     auto src_md =
@@ -813,7 +816,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
                           mkldnn_paddings[1]);
       }
 
-      ComputeAdaptivePoolParameters(ctx, src_tz, ksize, strides);
+      ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides);
 
       this->AcquireForwardPrimitiveDescriptor(
           is_test ? mkldnn::prop_kind::forward_inference
@@ -883,22 +886,22 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
 
   static void ComputeAdaptivePoolParameters(
       const paddle::framework::ExecutionContext& ctx,
-      const std::vector<int64_t>& src_tz, std::vector<int64_t>& ksize,
-      std::vector<int64_t>& strides) {
+      const std::vector<int64_t>& src_tz, std::vector<int64_t>* ksize,
+      std::vector<int64_t>* strides) {
     if (ctx.Attr<bool>("adaptive")) {
       // (jczaja): oneDNN is supporting only unchangable in size pool window
       PADDLE_ENFORCE_EQ(
-          src_tz[src_tz.size() - 1] % ksize[1], 0,
+          src_tz[src_tz.size() - 1] % ksize->at(1), 0,
           platform::errors::Unimplemented(
               "Input dim must be divisible by corressponding ksize dim."));
       PADDLE_ENFORCE_EQ(
-          src_tz[src_tz.size() - 2] % ksize[0], 0,
+          src_tz[src_tz.size() - 2] % ksize->at(0), 0,
           platform::errors::Unimplemented(
               "Input dim must be divisible by corressponding ksize dim."));
-      ksize[0] = src_tz[src_tz.size() - 2] / ksize[0];
-      ksize[1] = src_tz[src_tz.size() - 1] / ksize[1];
-      strides[0] = ksize[0];
-      strides[1] = ksize[1];
+      ksize->at(0) = src_tz[src_tz.size() - 2] / ksize->at(0);
+      ksize->at(1) = src_tz[src_tz.size() - 1] / ksize->at(1);
+      strides->at(0) = ksize->at(0);
+      strides->at(1) = ksize->at(1);
     }
   }
 
-- 
GitLab