From 661dbdbee5c1112a88d22fd0a76d4de06d2ca705 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Wed, 1 Dec 2021 09:43:25 +0800 Subject: [PATCH] Modify ShareTensorWithCinnBuffer by callback to save memory (#37493) Modify ShareTensorWithCinnBuffer by callback to save memory --- .../framework/paddle2cinn/cinn_compiler.cc | 1 + paddle/fluid/operators/cinn_launch_op.cc | 82 +++++++++++-------- paddle/fluid/operators/cinn_launch_op.h | 28 +++---- paddle/fluid/operators/cinn_launch_op_test.cc | 54 ++++-------- 4 files changed, 78 insertions(+), 87 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 3f1b6c78d84..360c9270782 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -209,6 +209,7 @@ std::unique_ptr CinnCompiler::CompileGraph( std::make_unique(target, scope, cinn_graph); GraphCompiler::CompileOptions options; options.with_instantiate_variables = false; + options.with_buffer_handle_instruction_inserted = true; auto compiled_res = graph_compiler->Build(options, std::move(fetch_ids), stream); auto compiled_obj = std::make_unique(); diff --git a/paddle/fluid/operators/cinn_launch_op.cc b/paddle/fluid/operators/cinn_launch_op.cc index e70a51d8805..f0ad5b3c3bf 100644 --- a/paddle/fluid/operators/cinn_launch_op.cc +++ b/paddle/fluid/operators/cinn_launch_op.cc @@ -13,7 +13,10 @@ // limitations under the License. #include "paddle/fluid/operators/cinn_launch_op.h" + +#include #include + #include "paddle/fluid/string/string_helper.h" DECLARE_bool(cudnn_deterministic); @@ -108,33 +111,9 @@ std::unordered_set CinnLaunchContext::GetInternalVariableNames() { return all_parameters; } -void CinnLaunchContext::MutableTensorData(const std::string& var_name, - const platform::Place& place, - LoDTensor* paddle_tensor, - bool is_internal_var) { - auto cinn_name = var_name; - if (!is_internal_var) { - PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( - "Paddle variable(%s) not used by cinn", var_name)); - cinn_name = paddle2cinn_varmap_.at(var_name); - } - - auto cinn_tensor = GetCinnTensor(cinn_name); - // TODO(CtfGo): support mutable corresponding c++ type after CINN ready - VLOG(4) << "Only support float in cinn_launch op now."; - paddle_tensor->mutable_data( - framework::make_ddim(cinn_tensor->shape().data()), place); -} - void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name, const LoDTensor& paddle_tensor, const CinnTensor& cinn_tensor) { - PADDLE_ENFORCE_EQ( - paddle_tensor.IsInitialized(), true, - platform::errors::InvalidArgument( - "Tensor in variable(%s) is not initialized.", paddle_name)); - // check dimension auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data()); PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims, @@ -147,27 +126,39 @@ void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name, } void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name, + const platform::Place& place, LoDTensor* paddle_tensor) { PADDLE_ENFORCE_EQ(IsVariableUsed(paddle_name), true, platform::errors::InvalidArgument( "Paddle variable(%s) not used by cinn", paddle_name)); const auto& cinn_name = paddle2cinn_varmap_.at(paddle_name); - CheckTensorEquivalent(paddle_name, *paddle_tensor, GetCinnTensor(cinn_name)); - return SetArgument(cinn_name, paddle_tensor); + CinnTensor cinn_tensor = GetCinnTensor(cinn_name); + if (!paddle_tensor->IsInitialized()) { + paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data())); + } + CheckTensorEquivalent(paddle_name, *paddle_tensor, cinn_tensor); + return SetArgument(cinn_name, place, /* free_mem_callback = */ false, + paddle_tensor); } void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name, + const platform::Place& place, LoDTensor* paddle_tensor) { PADDLE_ENFORCE_GT(cinn_variable_names_.count(cinn_name), 0, platform::errors::InvalidArgument( "Variable(%s) not found in cinn socpe.", cinn_name)); - CheckTensorEquivalent(cinn_name, *paddle_tensor, GetCinnTensor(cinn_name)); - return SetArgument(cinn_name, paddle_tensor); + CinnTensor cinn_tensor = GetCinnTensor(cinn_name); + if (!paddle_tensor->IsInitialized()) { + paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data())); + } + CheckTensorEquivalent(cinn_name, *paddle_tensor, cinn_tensor); + return SetArgument(cinn_name, place, /* free_mem_callback = */ true, + paddle_tensor); } std::unique_ptr CinnLaunchContext::ShareTensorWithCinnBuffer( - LoDTensor* tensor) { + const platform::Place& place, bool free_mem_callback, LoDTensor* tensor) { // convert paddle dimensions array to cinn format std::vector cinn_dims(tensor->dims().size()); for (auto i = 0; i < tensor->dims().size(); ++i) { @@ -177,19 +168,42 @@ std::unique_ptr CinnLaunchContext::ShareTensorWithCinnBuffer( auto cinn_buffer = std::make_unique(); // assign size and memory cinn_buffer->resize(cinn_dims.data(), cinn_dims.size()); - cinn_buffer->memory = reinterpret_cast(tensor->data()); + + cinn_buffer->external_malloc = new std::function( + [place, tensor](void* ctx, cinn_buffer_t* buffer) { + buffer->memory = + reinterpret_cast(tensor->mutable_data(place)); + return 0; + }); + + if (free_mem_callback) { + cinn_buffer->external_free = new std::function( + [tensor](void* ctx, cinn_buffer_t* buffer) { + tensor->clear(); + return 0; + }); + return cinn_buffer; + } + + cinn_buffer->external_free = new std::function( + [](void* ctx, cinn_buffer_t* buffer) { + // Do nothing + return 0; + }); return cinn_buffer; } void CinnLaunchContext::SetArgument(const std::string& cinn_name, + const platform::Place& place, + bool free_mem_callback, LoDTensor* paddle_tensor) { - auto buffer = ShareTensorWithCinnBuffer(paddle_tensor); + auto buffer = + ShareTensorWithCinnBuffer(place, free_mem_callback, paddle_tensor); name2argument_.emplace(cinn_name, buffer.get()); hold_buffers_.emplace_back(std::move(buffer)); VLOG(4) << "SetArgument-" << name2argument_.size() << ": " - << "name(" << cinn_name << "), " - << "type(" << framework::DataTypeToString(paddle_tensor->type()) - << "), dims(" << paddle_tensor->dims() << ")."; + << "name(" << cinn_name << "), dims(" << paddle_tensor->dims() + << ")."; } const std::map& diff --git a/paddle/fluid/operators/cinn_launch_op.h b/paddle/fluid/operators/cinn_launch_op.h index 53e6ff0d613..2b1bf89197d 100644 --- a/paddle/fluid/operators/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn_launch_op.h @@ -49,16 +49,13 @@ class CinnLaunchContext { // Return whether a Paddle variable used on compiled kernels bool IsVariableUsed(const std::string& var_name); - // Allocate buffer to a Paddle tensor with assginment information from CINN - void MutableTensorData(const std::string& var_name, - const platform::Place& place, LoDTensor* paddle_tensor, - bool is_internal_var = false); - // Assign tensor buffer to input or output variables - void AssignExternalVariable(const std::string& var_name, LoDTensor* tensor); + void AssignExternalVariable(const std::string& var_name, + const platform::Place& place, LoDTensor* tensor); // Assign tensor buffer to internal variables - void AssignInternalVariable(const std::string& var_name, LoDTensor* tensor); + void AssignInternalVariable(const std::string& var_name, + const platform::Place& place, LoDTensor* tensor); // Extract internal variable names from CinnScope // by excluding used input and output variables @@ -83,10 +80,12 @@ class CinnLaunchContext { // Share the buffer of a Paddle tensor to CINN by delivering memory address // to a cinn_buffer_t object - std::unique_ptr ShareTensorWithCinnBuffer(LoDTensor* tensor); + std::unique_ptr ShareTensorWithCinnBuffer( + const platform::Place& place, bool free_mem_callback, LoDTensor* tensor); // Set an argument with (cinn name)->(paddle tensor) pair - void SetArgument(const std::string& cinn_name, LoDTensor* paddle_tensor); + void SetArgument(const std::string& cinn_name, const platform::Place& place, + bool free_mem_callback, LoDTensor* paddle_tensor); private: // a variable name map from paddle to cinn @@ -198,7 +197,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { } launch_context->AssignExternalVariable( - var_name, scope.GetVar(var_name)->GetMutable()); + var_name, place, scope.GetVar(var_name)->GetMutable()); } // 3.2 Prepare output variables: all output variables should @@ -215,11 +214,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { "Output variable(%s) not used by cinn", var_name)); auto* tensor = scope.GetVar(var_name)->GetMutable(); - if (!tensor->IsInitialized()) { - launch_context->MutableTensorData(var_name, place, tensor); - } - launch_context->AssignExternalVariable( - var_name, scope.GetVar(var_name)->GetMutable()); + launch_context->AssignExternalVariable(var_name, place, tensor); } // 3.3 Prepare internal or temporary variables: Create a temporary @@ -232,8 +227,7 @@ class CinnLaunchOpKernel : public framework::OpKernel { framework::Scope* temp_scope = scope.NewTmpScope().release(); for (const auto& var_name : internal_variable_names) { auto* tensor = temp_scope->Var(var_name)->GetMutable(); - launch_context->MutableTensorData(var_name, place, tensor, true); - launch_context->AssignInternalVariable(var_name, tensor); + launch_context->AssignInternalVariable(var_name, place, tensor); } // Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic. diff --git a/paddle/fluid/operators/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn_launch_op_test.cc index 5a07a49a596..5e0b87d06af 100644 --- a/paddle/fluid/operators/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn_launch_op_test.cc @@ -222,30 +222,9 @@ TEST(CinnLaunchContextTest, TestGetInternalVariableNames) { auto launch_context = std::make_unique(GetDefaultCompiledObj()); auto internal_variable_names = launch_context->GetInternalVariableNames(); - ASSERT_EQ(internal_variable_names.size(), 1); - EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2"); -} - -TEST(CinnLaunchContextTest, TestMutableTensorData) { - platform::CPUPlace place; - framework::Scope scope; - auto* tensor1 = scope.Var("var1")->GetMutable(); - auto* tensor2 = scope.Var("var2")->GetMutable(); - - auto launch_context = - std::make_unique(GetDefaultCompiledObj()); - // mutable_data on external variable - ASSERT_NO_THROW(launch_context->MutableTensorData("var1", place, tensor1)); - ASSERT_TRUE(tensor1->IsInitialized()); - ASSERT_EQ(tensor1->dims(), framework::make_ddim({3, 4})); - ASSERT_THROW(launch_context->MutableTensorData("not_exist", place, tensor1), - paddle::platform::EnforceNotMet); - - // mutable_data on internal variable - ASSERT_NO_THROW( - launch_context->MutableTensorData("cinn_var2", place, tensor2, true)); - ASSERT_TRUE(tensor2->IsInitialized()); - ASSERT_EQ(tensor2->dims(), framework::make_ddim({6, 7, 8})); + ASSERT_EQ(internal_variable_names.size(), 3); + EXPECT_NE(internal_variable_names.find("cinn_var2"), + internal_variable_names.end()); } TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) { @@ -255,12 +234,9 @@ TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) { framework::Scope scope; auto* tensor1 = scope.Var("var1")->GetMutable(); - // CheckTensorEquivalent: tensor is not initialized - ASSERT_THROW(launch_context->AssignExternalVariable("var1", tensor1), - paddle::platform::EnforceNotMet); // CheckTensorEquivalent: tensor dimension not equivalent tensor1->mutable_data(framework::make_ddim({3, 5}), place); - ASSERT_THROW(launch_context->AssignExternalVariable("var1", tensor1), + ASSERT_THROW(launch_context->AssignExternalVariable("var1", place, tensor1), paddle::platform::EnforceNotMet); } @@ -272,11 +248,12 @@ TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) { auto* tensor4 = scope.Var("var4")->GetMutable(); // not used - ASSERT_THROW(launch_context->AssignExternalVariable("var4", tensor4), + ASSERT_THROW(launch_context->AssignExternalVariable("var4", place, tensor4), paddle::platform::EnforceNotMet); // not found - ASSERT_THROW(launch_context->AssignExternalVariable("cinn_var4", tensor4), - paddle::platform::EnforceNotMet); + ASSERT_THROW( + launch_context->AssignExternalVariable("cinn_var4", place, tensor4), + paddle::platform::EnforceNotMet); } TEST(CinnLaunchContextTest, TestSetArgument) { @@ -286,22 +263,25 @@ TEST(CinnLaunchContextTest, TestSetArgument) { platform::CPUPlace place; framework::Scope scope; auto* tensor1 = scope.Var("var1")->GetMutable(); - tensor1->mutable_data(framework::make_ddim({3, 4}), place); - auto* data1 = tensor1->data(); + float* data1 = + tensor1->mutable_data(framework::make_ddim({3, 4}), place); data1[0] = 9.99f; data1[10] = 19.99f; // assign external variable - ASSERT_NO_THROW(launch_context->AssignExternalVariable("var1", tensor1)); + ASSERT_NO_THROW( + launch_context->AssignExternalVariable("var1", place, tensor1)); auto* tensor2 = scope.Var("var2")->GetMutable(); tensor2->mutable_data(framework::make_ddim({6, 7, 8}), place); - ASSERT_NO_THROW(launch_context->AssignInternalVariable("cinn_var2", tensor2)); + ASSERT_NO_THROW( + launch_context->AssignInternalVariable("cinn_var2", place, tensor2)); // FinalizeArguments not missed check ASSERT_THROW(launch_context->FinalizeArguments(), paddle::platform::EnforceNotMet); auto* tensor3 = scope.Var("var3")->GetMutable(); tensor3->mutable_data(framework::make_ddim({10, 16}), place); - ASSERT_NO_THROW(launch_context->AssignExternalVariable("var3", tensor3)); + ASSERT_NO_THROW( + launch_context->AssignExternalVariable("var3", place, tensor3)); auto name2argument = launch_context->FinalizeArguments(); ASSERT_EQ(name2argument.size(), 3); @@ -310,6 +290,8 @@ TEST(CinnLaunchContextTest, TestSetArgument) { auto* cinn_buffer = static_cast(name2argument.at("cinn_var1")); + ASSERT_EQ(cinn_buffer->memory, nullptr); + cinn_buffer->external_malloc->operator()(nullptr, cinn_buffer); ASSERT_NE(cinn_buffer->memory, nullptr); ASSERT_EQ(cinn_buffer->num_elements(), 12); auto* shadow_data = reinterpret_cast(cinn_buffer->memory); -- GitLab