From b0d580a217f293a31b5f09dbce6eca9e126c512f Mon Sep 17 00:00:00 2001
From: sneaxiy <32832641+sneaxiy@users.noreply.github.com>
Date: Wed, 1 Dec 2021 21:25:22 +0800
Subject: [PATCH] Fix inplace addto pass by setting dtype correctly (#37717)

* fix inplace addto pass

* update

* fix ut

* improve ci coverage

* fix musl ci compile error
---
 .../details/share_tensor_buffer_functor.cc    | 10 +--
 .../details/share_tensor_buffer_functor.h     |  8 ++-
 .../details/share_tensor_buffer_op_handle.cc  |  9 +--
 .../details/share_tensor_buffer_op_handle.h   |  2 +-
 .../buffer_shared_inplace_op_pass.cc          |  3 +-
 .../inplace_addto_op_pass.cc                  |  4 +-
 .../memory_optimize_pass/memory_reuse_pass.cc |  6 +-
 paddle/fluid/framework/tensor.h               |  2 +
 paddle/fluid/operators/CMakeLists.txt         |  1 +
 paddle/fluid/operators/share_buffer_op.cc     |  3 +-
 paddle/fluid/operators/share_buffer_op.h      | 16 +++--
 .../fluid/operators/share_buffer_op_test.cc   | 71 +++++++++++++++++++
 .../unittests/test_apply_pass_to_program.py   |  2 +-
 13 files changed, 110 insertions(+), 27 deletions(-)
 create mode 100644 paddle/fluid/operators/share_buffer_op_test.cc
diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc
index ccc64a9cdc3..1225e2ee025 100644
--- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc
+++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc
@@ -39,14 +39,14 @@ ShareTensorBufferFunctor::ShareTensorBufferFunctor(
     Scope *scope, size_t scope_idx, const std::string &op_type,
     const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
     const std::vector<std::string> &out_var_names, const bool &is_variant_scope,
-    bool share_dims)
+    bool share_dims_and_dtype)
     : scope_(scope),
       scope_idx_(scope_idx),
       op_type_(op_type),
       in_var_infos_(in_var_infos),
       out_var_names_(out_var_names),
       is_variant_scope_(is_variant_scope),
-      share_dims_(share_dims) {
+      share_dims_and_dtype_(share_dims_and_dtype) {
   PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size(),
                     platform::errors::PreconditionNotMet(
                         "The number of input variables and output variables "
@@ -147,12 +147,14 @@ void ShareTensorBufferFunctor::operator()(Scope *exec_scope) {
       // NOTE(zhiqiu): In the case of inplace addto, if the operator of
       // the in_out_vars is skipped during running, we should set the dims of
       // output as the same as input.
-      if (share_dims_) {
+      if (share_dims_and_dtype_) {
         out_tensor->Resize(in_tensor.dims());
+        out_tensor->ShareDataTypeWith(in_tensor);
       }
 
       VLOG(2) << "Share tensor buffer when running " << op_type_ << " : "
-              << in_var_info->Name() << " -> " << out_var_names_[i];
+              << in_var_info->Name() << " -> " << out_var_names_[i]
+              << " share_dims_and_dtype = " << share_dims_and_dtype_;
     }
   }
 }
diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h
index 528b047bccc..f0ddb3f0137 100644
--- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h
+++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h
@@ -73,12 +73,14 @@ class ShareTensorBufferFunctor {
       Scope *scope, size_t scope_idx, const std::string &op_type,
       const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
       const std::vector<std::string> &out_var_names,
-      const bool &is_variant_scope, bool share_dims = false);
+      const bool &is_variant_scope, bool share_dims_and_dtype = false);
 
   void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info,
                        const std::string &out_var_name);
 
-  void SetShareDims(bool share_dims) { share_dims_ = share_dims; }
+  void SetShareDimsAndDtype(bool share_dims_and_dtype) {
+    share_dims_and_dtype_ = share_dims_and_dtype;
+  }
 
   void operator()(Scope *exec_scope);
 
@@ -108,7 +110,7 @@ class ShareTensorBufferFunctor {
   // NOTE(zhiqiu): In the case of inplace addto, if the operator of
   // the in_out_vars is skipped during running, we should set the dims of output
   // as the same as input.
-  bool share_dims_{false};
+  bool share_dims_and_dtype_{false};
 };
 
 }  // namespace details
diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc
index 7e10c669ac4..aa942415fb4 100644
--- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc
+++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc
@@ -64,10 +64,10 @@ ComputationOpHandle *GetUniquePendingComputationOpHandle(
 ShareTensorBufferOpHandle::ShareTensorBufferOpHandle(
     ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type,
     const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
-    const std::vector<std::string> &out_var_names, bool share_dims)
+    const std::vector<std::string> &out_var_names, bool share_dims_and_dtype)
     : OpHandleBase(node),
       functor_(scope, scope_idx, op_type, in_var_infos, out_var_names,
-               is_variant_scope_, share_dims) {}
+               is_variant_scope_, share_dims_and_dtype) {}
 
 std::unordered_map<std::string, std::string>
 ShareTensorBufferOpHandle::ReusedVars() const {
@@ -79,8 +79,9 @@ void ShareTensorBufferOpHandle::AddReuseVarPair(
   functor_.AddReuseVarPair(in_var_info, out_var_name);
 }
 
-void ShareTensorBufferOpHandle::SetShareDims(bool share_dims) {
-  functor_.SetShareDims(share_dims);
+void ShareTensorBufferOpHandle::SetShareDimsAndDtype(
+    bool share_dims_and_dtype) {
+  functor_.SetShareDimsAndDtype(share_dims_and_dtype);
 }
 
 void ShareTensorBufferOpHandle::InitCUDA() {
diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h
index dd2364fec4a..d3852a85d01 100644
--- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h
+++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h
@@ -56,7 +56,7 @@ class ShareTensorBufferOpHandle : public OpHandleBase {
   void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info,
                        const std::string &out_var_name);
 
-  void SetShareDims(bool share_dims);
+  void SetShareDimsAndDtype(bool share_dims_and_dtype);
 
   const ShareTensorBufferFunctor &Functor() const { return functor_; }
 
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc
index bf7cd55fab2..1ca6e989f27 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc
@@ -283,7 +283,8 @@ void BufferSharedInplaceOpPass::ApplyImpl(ProgramDesc *main_program,
     op->SetInput("X", inputs);
     op->SetOutput("Out", outputs);
     op->SetOutput("XOut", inputs);  // add necessary dependency
-    op->SetAttr("share_dims", std::vector<bool>(inputs.size(), false));
+    op->SetAttr("share_dims_and_dtype",
+                std::vector<bool>(inputs.size(), false));
   }
   block->Flush();
 }
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc
index d09de5be84c..0ed2ec51b89 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc
@@ -277,7 +277,7 @@ static void BuildInplaceAddToGraph(Node *in_var_0, Node *in_var_1,
   grad_add_op_desc->SetInput("X", {in_var_1->Name()});
   grad_add_op_desc->SetOutput("Out", {out_var->Name()});
   grad_add_op_desc->SetOutput("XOut", {in_var_1->Name()});
-  grad_add_op_desc->SetAttr("share_dims", std::vector<bool>(1, true));
+  grad_add_op_desc->SetAttr("share_dims_and_dtype", std::vector<bool>(1, true));
 
   // Add share_buffer op between in_var_0 and in_var_1
   OpDesc share_buffer_op;
@@ -285,7 +285,7 @@ static void BuildInplaceAddToGraph(Node *in_var_0, Node *in_var_1,
   share_buffer_op.SetInput("X", {in_var_0->Name()});
   share_buffer_op.SetOutput("Out", {in_var_1->Name()});
   share_buffer_op.SetOutput("XOut", {in_var_0->Name()});
-  share_buffer_op.SetAttr("share_dims", std::vector<bool>(1, false));
+  share_buffer_op.SetAttr("share_dims_and_dtype", std::vector<bool>(1, false));
 
   auto *new_share_buffer_op = graph->CreateOpNode(&share_buffer_op);
   new_share_buffer_op->inputs.push_back(in_var_0);
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc
index f6465d38584..9d1e2301704 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc
@@ -329,7 +329,7 @@ bool MemoryReusePass::IsVarPairReusable(
 void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op,
                                   details::VarHandle *in_var,
                                   details::VarHandle *out_var,
-                                  bool share_dims) const {
+                                  bool share_dims_and_dtype) const {
   PADDLE_ENFORCE_GT(
       (*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0,
       platform::errors::NotFound("Var(%s) does not in mem opt var infos.",
@@ -349,8 +349,8 @@ void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op,
     share_buffer_op->AddInput(in_var);
   }
 
-  if (share_dims) {
-    share_buffer_op->SetShareDims(true);
+  if (share_dims_and_dtype) {
+    share_buffer_op->SetShareDimsAndDtype(true);
   }
 
   share_buffer_op->AddReuseVarPair(
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index 7f8d7bffa98..2efaa3f37f9 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -260,6 +260,8 @@ class Tensor {
     // should not be copied.
   }
 
+  void ShareDataTypeWith(const Tensor& tensor) { type_ = tensor.type_; }
+
   bool IsSharedBufferWith(const Tensor& src) const {
     return holder_ && holder_ == src.Holder();
   }
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index a8f35d61f3c..0c3572ab655 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -203,6 +203,7 @@ elseif(WITH_ROCM)
 else()
     cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3)
 endif()
+cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context share_buffer_op) 
 
 cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS})
 if (WITH_PYTHON)
diff --git a/paddle/fluid/operators/share_buffer_op.cc b/paddle/fluid/operators/share_buffer_op.cc
index a161b9272b7..f6a6c9695b2 100644
--- a/paddle/fluid/operators/share_buffer_op.cc
+++ b/paddle/fluid/operators/share_buffer_op.cc
@@ -49,7 +49,8 @@ class ShareBufferOpMaker : public framework::OpProtoAndCheckerMaker {
               "(Tensor), The output tensors which are the same as X. It is "
               "used to build the graph dependency")
         .AsDuplicable();
-    AddAttr<std::vector<bool>>("share_dims", "Whether to share dims")
+    AddAttr<std::vector<bool>>("share_dims_and_dtype",
+                               "Whether to share dims and data type")
         .SetDefault(std::vector<bool>());
     AddComment(
         R"DOC(Operator used to perform inplace memory reuse. It should be not exposed to Python APIs.)DOC");
diff --git a/paddle/fluid/operators/share_buffer_op.h b/paddle/fluid/operators/share_buffer_op.h
index 5138ad9d54b..1d0abf14f57 100644
--- a/paddle/fluid/operators/share_buffer_op.h
+++ b/paddle/fluid/operators/share_buffer_op.h
@@ -29,12 +29,13 @@ class ShareBufferOpKernel : public framework::OpKernel<T> {
     size_t n = inputs.size();
     PADDLE_ENFORCE_EQ(n, outputs.size(), platform::errors::PermissionDenied(
                                              "Variable number not match."));
-    const auto &share_dims = ctx.Attr<std::vector<bool>>("share_dims");
-    if (!share_dims.empty()) {
-      PADDLE_ENFORCE_EQ(
-          n, share_dims.size(),
-          platform::errors::PermissionDenied(
-              "Attribute share_dims number not match input variable number."));
+    const auto &share_dims_and_dtype =
+        ctx.Attr<std::vector<bool>>("share_dims_and_dtype");
+    if (!share_dims_and_dtype.empty()) {
+      PADDLE_ENFORCE_EQ(n, share_dims_and_dtype.size(),
+                        platform::errors::PermissionDenied(
+                            "Attribute share_dims_and_dtype number not match "
+                            "input variable number."));
     }
 
     const std::vector<std::string> *input_args = nullptr,
@@ -50,8 +51,9 @@ class ShareBufferOpKernel : public framework::OpKernel<T> {
       outputs[i]->ShareBufferWith(*inputs[i]);
       VLOG(10) << "Share tensor buffer " << (*input_args)[i] << " -> "
                << (*output_args)[i];
-      if (!share_dims.empty() && share_dims[i]) {
+      if (!share_dims_and_dtype.empty() && share_dims_and_dtype[i]) {
         outputs[i]->Resize(inputs[i]->dims());
+        outputs[i]->ShareDataTypeWith(*inputs[i]);
       }
     }
   }
diff --git a/paddle/fluid/operators/share_buffer_op_test.cc b/paddle/fluid/operators/share_buffer_op_test.cc
new file mode 100644
index 00000000000..60220981cab
--- /dev/null
+++ b/paddle/fluid/operators/share_buffer_op_test.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/platform/place.h"
+
+USE_OP(share_buffer);
+
+namespace paddle {
+namespace framework {
+
+TEST(test_share_buffer_op, test_share_buffer_op) {
+  std::vector<std::string> inputs = {"X1", "X2"};
+  std::vector<std::string> outputs = {"Y1", "Y2"};
+  std::vector<DDim> dims = {{2, 3, 4}, {5, 6}};
+  std::vector<bool> share_dims_and_dtype = {false, true};
+
+  size_t n = inputs.size();
+  EXPECT_EQ(n, outputs.size());
+  EXPECT_EQ(n, dims.size());
+  EXPECT_EQ(n, share_dims_and_dtype.size());
+
+  OpDesc desc;
+  desc.SetType("share_buffer");
+  desc.SetInput("X", inputs);
+  desc.SetOutput("Out", outputs);
+  desc.SetOutput("XOut", inputs);
+  desc.SetAttr("share_dims_and_dtype", share_dims_and_dtype);
+
+  auto op = OpRegistry::CreateOp(desc);
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  platform::Place place = platform::CUDAPlace(0);
+#else
+  platform::Place place = platform::CPUPlace();
+#endif
+
+  Scope scope;
+  for (size_t i = 0; i < n; ++i) {
+    auto *in_tensor = scope.Var(inputs[i])->GetMutable<LoDTensor>();
+    in_tensor->Resize(dims[i]);
+    in_tensor->mutable_data<float>(place);
+    scope.Var(outputs[i])->GetMutable<LoDTensor>();
+  }
+  op->Run(scope, place);
+  platform::DeviceContextPool::Instance().Get(place)->Wait();
+
+  for (size_t i = 0; i < n; ++i) {
+    const auto &in_tensor = scope.Var(inputs[i])->Get<LoDTensor>();
+    const auto &out_tensor = scope.Var(outputs[i])->Get<LoDTensor>();
+    EXPECT_TRUE(out_tensor.IsSharedBufferWith(in_tensor));
+  }
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
index 422cb58ff9a..4552d600baf 100644
--- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
+++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
@@ -123,7 +123,7 @@ class TestIRPassBase(unittest.TestCase):
             if op.type != "share_buffer":
                 continue
 
-            share_dims = op.attr("share_dims")
+            share_dims = op.attr("share_dims_and_dtype")
             if share_dims:
                 for i in range(len(share_dims)):
                     self.assertEqual(share_dims[0], share_dims[i])
-- 
GitLab