From b0d580a217f293a31b5f09dbce6eca9e126c512f Mon Sep 17 00:00:00 2001 From: sneaxiy <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 1 Dec 2021 21:25:22 +0800 Subject: [PATCH] Fix inplace addto pass by setting dtype correctly (#37717) * fix inplace addto pass * update * fix ut * improve ci coverage * fix musl ci compile error --- .../details/share_tensor_buffer_functor.cc | 10 +-- .../details/share_tensor_buffer_functor.h | 8 ++- .../details/share_tensor_buffer_op_handle.cc | 9 +-- .../details/share_tensor_buffer_op_handle.h | 2 +- .../buffer_shared_inplace_op_pass.cc | 3 +- .../inplace_addto_op_pass.cc | 4 +- .../memory_optimize_pass/memory_reuse_pass.cc | 6 +- paddle/fluid/framework/tensor.h | 2 + paddle/fluid/operators/CMakeLists.txt | 1 + paddle/fluid/operators/share_buffer_op.cc | 3 +- paddle/fluid/operators/share_buffer_op.h | 16 +++-- .../fluid/operators/share_buffer_op_test.cc | 71 +++++++++++++++++++ .../unittests/test_apply_pass_to_program.py | 2 +- 13 files changed, 110 insertions(+), 27 deletions(-) create mode 100644 paddle/fluid/operators/share_buffer_op_test.cc diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index ccc64a9cdc..1225e2ee02 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -39,14 +39,14 @@ ShareTensorBufferFunctor::ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, const std::vector &out_var_names, const bool &is_variant_scope, - bool share_dims) + bool share_dims_and_dtype) : scope_(scope), scope_idx_(scope_idx), op_type_(op_type), in_var_infos_(in_var_infos), out_var_names_(out_var_names), is_variant_scope_(is_variant_scope), - share_dims_(share_dims) { + share_dims_and_dtype_(share_dims_and_dtype) { PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size(), platform::errors::PreconditionNotMet( "The number of input variables and output variables " @@ -147,12 +147,14 @@ void ShareTensorBufferFunctor::operator()(Scope *exec_scope) { // NOTE(zhiqiu): In the case of inplace addto, if the operator of // the in_out_vars is skipped during running, we should set the dims of // output as the same as input. - if (share_dims_) { + if (share_dims_and_dtype_) { out_tensor->Resize(in_tensor.dims()); + out_tensor->ShareDataTypeWith(in_tensor); } VLOG(2) << "Share tensor buffer when running " << op_type_ << " : " - << in_var_info->Name() << " -> " << out_var_names_[i]; + << in_var_info->Name() << " -> " << out_var_names_[i] + << " share_dims_and_dtype = " << share_dims_and_dtype_; } } } diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index 528b047bcc..f0ddb3f013 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -73,12 +73,14 @@ class ShareTensorBufferFunctor { Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, const std::vector &out_var_names, - const bool &is_variant_scope, bool share_dims = false); + const bool &is_variant_scope, bool share_dims_and_dtype = false); void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); - void SetShareDims(bool share_dims) { share_dims_ = share_dims; } + void SetShareDimsAndDtype(bool share_dims_and_dtype) { + share_dims_and_dtype_ = share_dims_and_dtype; + } void operator()(Scope *exec_scope); @@ -108,7 +110,7 @@ class ShareTensorBufferFunctor { // NOTE(zhiqiu): In the case of inplace addto, if the operator of // the in_out_vars is skipped during running, we should set the dims of output // as the same as input. - bool share_dims_{false}; + bool share_dims_and_dtype_{false}; }; } // namespace details diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index 7e10c669ac..aa942415fb 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -64,10 +64,10 @@ ComputationOpHandle *GetUniquePendingComputationOpHandle( ShareTensorBufferOpHandle::ShareTensorBufferOpHandle( ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names, bool share_dims) + const std::vector &out_var_names, bool share_dims_and_dtype) : OpHandleBase(node), functor_(scope, scope_idx, op_type, in_var_infos, out_var_names, - is_variant_scope_, share_dims) {} + is_variant_scope_, share_dims_and_dtype) {} std::unordered_map ShareTensorBufferOpHandle::ReusedVars() const { @@ -79,8 +79,9 @@ void ShareTensorBufferOpHandle::AddReuseVarPair( functor_.AddReuseVarPair(in_var_info, out_var_name); } -void ShareTensorBufferOpHandle::SetShareDims(bool share_dims) { - functor_.SetShareDims(share_dims); +void ShareTensorBufferOpHandle::SetShareDimsAndDtype( + bool share_dims_and_dtype) { + functor_.SetShareDimsAndDtype(share_dims_and_dtype); } void ShareTensorBufferOpHandle::InitCUDA() { diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h index dd2364fec4..d3852a85d0 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h @@ -56,7 +56,7 @@ class ShareTensorBufferOpHandle : public OpHandleBase { void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); - void SetShareDims(bool share_dims); + void SetShareDimsAndDtype(bool share_dims_and_dtype); const ShareTensorBufferFunctor &Functor() const { return functor_; } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index bf7cd55fab..1ca6e989f2 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -283,7 +283,8 @@ void BufferSharedInplaceOpPass::ApplyImpl(ProgramDesc *main_program, op->SetInput("X", inputs); op->SetOutput("Out", outputs); op->SetOutput("XOut", inputs); // add necessary dependency - op->SetAttr("share_dims", std::vector(inputs.size(), false)); + op->SetAttr("share_dims_and_dtype", + std::vector(inputs.size(), false)); } block->Flush(); } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc index d09de5be84..0ed2ec51b8 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc @@ -277,7 +277,7 @@ static void BuildInplaceAddToGraph(Node *in_var_0, Node *in_var_1, grad_add_op_desc->SetInput("X", {in_var_1->Name()}); grad_add_op_desc->SetOutput("Out", {out_var->Name()}); grad_add_op_desc->SetOutput("XOut", {in_var_1->Name()}); - grad_add_op_desc->SetAttr("share_dims", std::vector(1, true)); + grad_add_op_desc->SetAttr("share_dims_and_dtype", std::vector(1, true)); // Add share_buffer op between in_var_0 and in_var_1 OpDesc share_buffer_op; @@ -285,7 +285,7 @@ static void BuildInplaceAddToGraph(Node *in_var_0, Node *in_var_1, share_buffer_op.SetInput("X", {in_var_0->Name()}); share_buffer_op.SetOutput("Out", {in_var_1->Name()}); share_buffer_op.SetOutput("XOut", {in_var_0->Name()}); - share_buffer_op.SetAttr("share_dims", std::vector(1, false)); + share_buffer_op.SetAttr("share_dims_and_dtype", std::vector(1, false)); auto *new_share_buffer_op = graph->CreateOpNode(&share_buffer_op); new_share_buffer_op->inputs.push_back(in_var_0); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc index f6465d3858..9d1e230170 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc @@ -329,7 +329,7 @@ bool MemoryReusePass::IsVarPairReusable( void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, details::VarHandle *out_var, - bool share_dims) const { + bool share_dims_and_dtype) const { PADDLE_ENFORCE_GT( (*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0, platform::errors::NotFound("Var(%s) does not in mem opt var infos.", @@ -349,8 +349,8 @@ void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, share_buffer_op->AddInput(in_var); } - if (share_dims) { - share_buffer_op->SetShareDims(true); + if (share_dims_and_dtype) { + share_buffer_op->SetShareDimsAndDtype(true); } share_buffer_op->AddReuseVarPair( diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 7f8d7bffa9..2efaa3f37f 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -260,6 +260,8 @@ class Tensor { // should not be copied. } + void ShareDataTypeWith(const Tensor& tensor) { type_ = tensor.type_; } + bool IsSharedBufferWith(const Tensor& src) const { return holder_ && holder_ == src.Holder(); } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index a8f35d61f3..0c3572ab65 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -203,6 +203,7 @@ elseif(WITH_ROCM) else() cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3) endif() +cc_test(share_buffer_op_cpp_test SRCS share_buffer_op_test.cc DEPS lod_tensor device_context share_buffer_op) cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS}) if (WITH_PYTHON) diff --git a/paddle/fluid/operators/share_buffer_op.cc b/paddle/fluid/operators/share_buffer_op.cc index a161b9272b..f6a6c9695b 100644 --- a/paddle/fluid/operators/share_buffer_op.cc +++ b/paddle/fluid/operators/share_buffer_op.cc @@ -49,7 +49,8 @@ class ShareBufferOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor), The output tensors which are the same as X. It is " "used to build the graph dependency") .AsDuplicable(); - AddAttr>("share_dims", "Whether to share dims") + AddAttr>("share_dims_and_dtype", + "Whether to share dims and data type") .SetDefault(std::vector()); AddComment( R"DOC(Operator used to perform inplace memory reuse. It should be not exposed to Python APIs.)DOC"); diff --git a/paddle/fluid/operators/share_buffer_op.h b/paddle/fluid/operators/share_buffer_op.h index 5138ad9d54..1d0abf14f5 100644 --- a/paddle/fluid/operators/share_buffer_op.h +++ b/paddle/fluid/operators/share_buffer_op.h @@ -29,12 +29,13 @@ class ShareBufferOpKernel : public framework::OpKernel { size_t n = inputs.size(); PADDLE_ENFORCE_EQ(n, outputs.size(), platform::errors::PermissionDenied( "Variable number not match.")); - const auto &share_dims = ctx.Attr>("share_dims"); - if (!share_dims.empty()) { - PADDLE_ENFORCE_EQ( - n, share_dims.size(), - platform::errors::PermissionDenied( - "Attribute share_dims number not match input variable number.")); + const auto &share_dims_and_dtype = + ctx.Attr>("share_dims_and_dtype"); + if (!share_dims_and_dtype.empty()) { + PADDLE_ENFORCE_EQ(n, share_dims_and_dtype.size(), + platform::errors::PermissionDenied( + "Attribute share_dims_and_dtype number not match " + "input variable number.")); } const std::vector *input_args = nullptr, @@ -50,8 +51,9 @@ class ShareBufferOpKernel : public framework::OpKernel { outputs[i]->ShareBufferWith(*inputs[i]); VLOG(10) << "Share tensor buffer " << (*input_args)[i] << " -> " << (*output_args)[i]; - if (!share_dims.empty() && share_dims[i]) { + if (!share_dims_and_dtype.empty() && share_dims_and_dtype[i]) { outputs[i]->Resize(inputs[i]->dims()); + outputs[i]->ShareDataTypeWith(*inputs[i]); } } } diff --git a/paddle/fluid/operators/share_buffer_op_test.cc b/paddle/fluid/operators/share_buffer_op_test.cc new file mode 100644 index 0000000000..60220981ca --- /dev/null +++ b/paddle/fluid/operators/share_buffer_op_test.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/place.h" + +USE_OP(share_buffer); + +namespace paddle { +namespace framework { + +TEST(test_share_buffer_op, test_share_buffer_op) { + std::vector inputs = {"X1", "X2"}; + std::vector outputs = {"Y1", "Y2"}; + std::vector dims = {{2, 3, 4}, {5, 6}}; + std::vector share_dims_and_dtype = {false, true}; + + size_t n = inputs.size(); + EXPECT_EQ(n, outputs.size()); + EXPECT_EQ(n, dims.size()); + EXPECT_EQ(n, share_dims_and_dtype.size()); + + OpDesc desc; + desc.SetType("share_buffer"); + desc.SetInput("X", inputs); + desc.SetOutput("Out", outputs); + desc.SetOutput("XOut", inputs); + desc.SetAttr("share_dims_and_dtype", share_dims_and_dtype); + + auto op = OpRegistry::CreateOp(desc); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + platform::Place place = platform::CUDAPlace(0); +#else + platform::Place place = platform::CPUPlace(); +#endif + + Scope scope; + for (size_t i = 0; i < n; ++i) { + auto *in_tensor = scope.Var(inputs[i])->GetMutable(); + in_tensor->Resize(dims[i]); + in_tensor->mutable_data(place); + scope.Var(outputs[i])->GetMutable(); + } + op->Run(scope, place); + platform::DeviceContextPool::Instance().Get(place)->Wait(); + + for (size_t i = 0; i < n; ++i) { + const auto &in_tensor = scope.Var(inputs[i])->Get(); + const auto &out_tensor = scope.Var(outputs[i])->Get(); + EXPECT_TRUE(out_tensor.IsSharedBufferWith(in_tensor)); + } +} + +} // namespace framework +} // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py index 422cb58ff9..4552d600ba 100644 --- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py +++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py @@ -123,7 +123,7 @@ class TestIRPassBase(unittest.TestCase): if op.type != "share_buffer": continue - share_dims = op.attr("share_dims") + share_dims = op.attr("share_dims_and_dtype") if share_dims: for i in range(len(share_dims)): self.assertEqual(share_dims[0], share_dims[i]) -- GitLab