From 151c5d748476f16b49b4436e447c8adc0e018c2e Mon Sep 17 00:00:00 2001 From: CtfGo Date: Thu, 9 Dec 2021 21:59:54 +0800 Subject: [PATCH] cache scope and place on CinnLaunchContext and pass them to callback (#37983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cinn_launch_op: cache scope and place on CinnLaunchContext to skip duplicate alloc/free callback construction --- .../framework/paddle2cinn/CMakeLists.txt | 2 +- .../framework/paddle2cinn/cinn_compiler.cc | 4 + .../framework/paddle2cinn/cinn_compiler.h | 8 ++ .../operators/cinn/cinn_launch_context.cc | 127 ++++++++++-------- .../operators/cinn/cinn_launch_context.h | 37 ++--- .../cinn/cinn_launch_context_test.cc | 71 +++++----- .../fluid/operators/cinn/cinn_launch_op.cu.cc | 20 --- paddle/fluid/operators/cinn/cinn_launch_op.h | 109 ++++++--------- .../operators/cinn/cinn_launch_op_test.cc | 5 +- 9 files changed, 191 insertions(+), 192 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index b13166cff60..8d2ee2f0100 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(cinn_cache_key SRCS cinn_cache_key.cc DEPS boost graph graph_helper l cc_library(build_cinn_pass SRCS build_cinn_pass.cc DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors enforce) cc_library(transform_desc SRCS transform_desc.cc DEPS proto_desc cinn) cc_library(cinn_graph_symbolization SRCS cinn_graph_symbolization.cc DEPS lod_tensor graph transform_desc cinn) -cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn) +cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn cinn_launch_context) if (WITH_TESTING) cc_test(cinn_lib_test SRCS cinn_lib_test.cc DEPS cinn) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 7fc8eff3d31..131a6a09e8f 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -41,6 +41,7 @@ #include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/analysis/dot.h" +#include "paddle/fluid/operators/cinn/cinn_launch_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/string_helper.h" @@ -217,6 +218,9 @@ std::unique_ptr CinnCompiler::CompileGraph( *compiled_obj = {std::move(graph_compiler), std::move(compiled_res.runtime_program), scope, symbol.var_model_to_program_map()}; + compiled_obj->launch_context = + std::make_unique( + compiled_obj->paddle2cinn_varmap, compiled_obj->scope); return compiled_obj; } diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h index 71119acf1fb..d75279cfe96 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h @@ -31,6 +31,13 @@ #include "paddle/fluid/platform/macros.h" namespace paddle { + +namespace operators { +namespace details { +class CinnLaunchContext; +} +} + namespace framework { namespace paddle2cinn { @@ -39,6 +46,7 @@ struct CinnCompiledObject { std::unique_ptr<::cinn::hlir::framework::Program> runtime_program; std::shared_ptr<::cinn::hlir::framework::Scope> scope; std::unordered_map paddle2cinn_varmap; + std::unique_ptr launch_context; }; // Entrance to use CINN. diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 90a4ca73399..f6337a754f9 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -32,7 +32,30 @@ CinnLaunchContext::CinnLaunchContext( [](const auto& name_view) { return std::string(name_view.data()); }); } -bool CinnLaunchContext::IsVariableUsed(const std::string& paddle_name) { +void CinnLaunchContext::UpdateCapturedEnv(const framework::Scope& scope, + const platform::Place& place) { + if (std::addressof(scope) == cached_scope_ && + std::addressof(place) == cached_place_) { + VLOG(4) << "Captured scope:" << cached_scope_ << ", place:" << cached_place_ + << " are not changed"; + return; + } + cached_scope_ = std::addressof(scope); + cached_place_ = std::addressof(place); + cached_temp_scope_ = scope.NewTmpScope(); + VLOG(4) << "Captured env is update, scope:" << cached_scope_ << "->" + << std::addressof(scope) << ", place:" << cached_place_ << "->" + << std::addressof(place); +} + +bool CinnLaunchContext::IsArgumentsInitialized() const { + if (hold_buffers_.empty() || name2argument_.empty()) { + return false; + } + return true; +} + +bool CinnLaunchContext::IsVariableUsed(const std::string& paddle_name) const { return paddle2cinn_varmap_.count(paddle_name) > 0 && cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_name)) > 0; } @@ -67,85 +90,83 @@ void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name, // TODO(CtfGo): check the underlying data type after CINN ready } -void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name, - const platform::Place& place, - LoDTensor* paddle_tensor) { +void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name) { PADDLE_ENFORCE_EQ(IsVariableUsed(paddle_name), true, platform::errors::InvalidArgument( "Paddle variable(%s) not used by cinn", paddle_name)); const auto& cinn_name = paddle2cinn_varmap_.at(paddle_name); + const auto& paddle_tensor = + cached_scope_->GetVar(paddle_name)->Get(); CinnTensor cinn_tensor = GetCinnTensor(cinn_name); - if (!paddle_tensor->IsInitialized()) { - paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data())); + if (paddle_tensor.IsInitialized()) { + CheckTensorEquivalent(paddle_name, paddle_tensor, cinn_tensor); } - CheckTensorEquivalent(paddle_name, *paddle_tensor, cinn_tensor); - return SetArgument(cinn_name, place, /* free_mem_callback = */ false, - paddle_tensor); + + auto cinn_buffer = std::make_unique(); + // assign dimensions and alloc/free callback of cinn_buffer_t + cinn_buffer->resize(cinn_tensor->shape().data().data(), + cinn_tensor->shape().data().size()); + cinn_buffer->external_malloc = new std::function( + [this, paddle_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = + cached_scope_->GetVar(paddle_name)->GetMutable(); + tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); + buffer->memory = reinterpret_cast( + tensor->mutable_data(*cached_place_)); + return 0; + }); + + // external variables will be recycled by global gc, so do nothing here + cinn_buffer->external_free = new std::function( + [](void* ctx, cinn_buffer_t* buffer) { + // Do nothing + return 0; + }); + + return SetArgument(cinn_name, std::move(cinn_buffer)); } -void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name, - const platform::Place& place, - LoDTensor* paddle_tensor) { +void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name) { PADDLE_ENFORCE_GT(cinn_variable_names_.count(cinn_name), 0, platform::errors::InvalidArgument( "Variable(%s) not found in cinn socpe.", cinn_name)); CinnTensor cinn_tensor = GetCinnTensor(cinn_name); - if (!paddle_tensor->IsInitialized()) { - paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data())); - } - CheckTensorEquivalent(cinn_name, *paddle_tensor, cinn_tensor); - return SetArgument(cinn_name, place, /* free_mem_callback = */ true, - paddle_tensor); -} - -std::unique_ptr CinnLaunchContext::ShareTensorWithCinnBuffer( - const platform::Place& place, bool free_mem_callback, LoDTensor* tensor) { - // convert paddle dimensions array to cinn format - std::vector cinn_dims(tensor->dims().size()); - for (auto i = 0; i < tensor->dims().size(); ++i) { - cinn_dims[i] = static_cast(tensor->dims().at(i)); - } - auto cinn_buffer = std::make_unique(); - // assign size and memory - cinn_buffer->resize(cinn_dims.data(), cinn_dims.size()); + // assign dimensions and alloc/free callback of cinn_buffer_t + cinn_buffer->resize(cinn_tensor->shape().data().data(), + cinn_tensor->shape().data().size()); cinn_buffer->external_malloc = new std::function( - [place, tensor](void* ctx, cinn_buffer_t* buffer) { - buffer->memory = - reinterpret_cast(tensor->mutable_data(place)); + [this, cinn_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = + cached_temp_scope_->Var(cinn_name)->GetMutable(); + tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); + buffer->memory = reinterpret_cast( + tensor->mutable_data(*cached_place_)); return 0; }); - if (free_mem_callback) { - cinn_buffer->external_free = new std::function( - [tensor](void* ctx, cinn_buffer_t* buffer) { - tensor->clear(); - return 0; - }); - return cinn_buffer; - } - + // internal variables should release its buffer immediately + // if no instruction use it cinn_buffer->external_free = new std::function( - [](void* ctx, cinn_buffer_t* buffer) { - // Do nothing + [this, cinn_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = + cached_temp_scope_->GetVar(cinn_name)->GetMutable(); + tensor->clear(); return 0; }); - return cinn_buffer; + return SetArgument(cinn_name, std::move(cinn_buffer)); } void CinnLaunchContext::SetArgument(const std::string& cinn_name, - const platform::Place& place, - bool free_mem_callback, - LoDTensor* paddle_tensor) { - auto buffer = - ShareTensorWithCinnBuffer(place, free_mem_callback, paddle_tensor); + std::unique_ptr&& buffer) { + VLOG(4) << "SetArgument-" << name2argument_.size() << ": name(" << cinn_name + << "), dims(" << framework::DDim(buffer->dims, buffer->dimensions) + << ")."; + name2argument_.emplace(cinn_name, buffer.get()); hold_buffers_.emplace_back(std::move(buffer)); - VLOG(4) << "SetArgument-" << name2argument_.size() << ": " - << "name(" << cinn_name << "), dims(" << paddle_tensor->dims() - << ")."; } const std::map& diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index c990255d682..7bf70c9b9d0 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -24,7 +24,7 @@ #include "cinn/runtime/cinn_runtime.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/framework/scope.h" namespace paddle { namespace operators { @@ -40,16 +40,22 @@ class CinnLaunchContext { const std::unordered_map& paddle2cinn_varmap, const std::shared_ptr& cinn_scope); + // explicitly update several environment variables captured + // by callback of execution arguments + void UpdateCapturedEnv(const framework::Scope& scope, + const platform::Place& place); + + // Return whether execution arguments has been initialized + bool IsArgumentsInitialized() const; + // Return whether a Paddle variable used on compiled kernels - bool IsVariableUsed(const std::string& var_name); + bool IsVariableUsed(const std::string& paddle_name) const; // Assign tensor buffer to input or output variables - void AssignExternalVariable(const std::string& var_name, - const platform::Place& place, LoDTensor* tensor); + void AssignExternalVariable(const std::string& paddle_name); // Assign tensor buffer to internal variables - void AssignInternalVariable(const std::string& var_name, - const platform::Place& place, LoDTensor* tensor); + void AssignInternalVariable(const std::string& cinn_name); // Extract internal variable names from CinnScope // by excluding used input and output variables @@ -58,10 +64,6 @@ class CinnLaunchContext { // Finalize all execution arguments and return them const std::map& FinalizeArguments() const; - std::vector> HandoverBuffers() { - return std::move(hold_buffers_); - } - private: // Get CinnTensor with CINN variable name CinnTensor GetCinnTensor(const std::string& var_name); @@ -72,16 +74,15 @@ class CinnLaunchContext { const LoDTensor& paddle_tensor, const CinnTensor& cinn_tensor); - // Share the buffer of a Paddle tensor to CINN by delivering memory address - // to a cinn_buffer_t object - std::unique_ptr ShareTensorWithCinnBuffer( - const platform::Place& place, bool free_mem_callback, LoDTensor* tensor); - - // Set an argument with (cinn name)->(paddle tensor) pair - void SetArgument(const std::string& cinn_name, const platform::Place& place, - bool free_mem_callback, LoDTensor* paddle_tensor); + // Set an argument with (cinn name)->(cinn_buffer_t) pair + void SetArgument(const std::string& cinn_name, + std::unique_ptr&& buffer); private: + const framework::Scope* cached_scope_ = nullptr; + const platform::Place* cached_place_ = nullptr; + std::unique_ptr cached_temp_scope_ = nullptr; + // a variable name map from paddle to cinn const std::unordered_map& paddle2cinn_varmap_; // the variable scope of cinn diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc index d922e8355b4..da7640c3c0f 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc @@ -45,81 +45,86 @@ std::unique_ptr CreateDefaultLaunchContext() { return std::make_unique(paddle2cinn_varmap, cinn_scope); } -TEST(CinnLaunchContextTest, TestIsVariableUsed) { +TEST(CinnLaunchContextTest, TestBasic) { auto launch_context = CreateDefaultLaunchContext(); - + // test IsVariableUsed ASSERT_EQ(launch_context->IsVariableUsed("var1"), true); ASSERT_EQ(launch_context->IsVariableUsed("var4"), false); -} - -TEST(CinnLaunchContextTest, TestGetInternalVariableNames) { - auto launch_context = CreateDefaultLaunchContext(); - auto internal_variable_names = launch_context->GetInternalVariableNames(); - ASSERT_EQ(internal_variable_names.size(), 3); - EXPECT_NE(internal_variable_names.find("cinn_var2"), - internal_variable_names.end()); + // test UpdateCapturedEnv + platform::CPUPlace place; + framework::Scope scope; + ASSERT_NO_THROW(launch_context->UpdateCapturedEnv(scope, place)); + // test IsArgumentsInitialized + ASSERT_FALSE(launch_context->IsArgumentsInitialized()); } TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) { - auto launch_context = CreateDefaultLaunchContext(); platform::CPUPlace place; framework::Scope scope; + auto launch_context = CreateDefaultLaunchContext(); + launch_context->UpdateCapturedEnv(scope, place); auto* tensor1 = scope.Var("var1")->GetMutable(); // CheckTensorEquivalent: tensor dimension not equivalent tensor1->mutable_data(framework::make_ddim({3, 5}), place); - ASSERT_THROW(launch_context->AssignExternalVariable("var1", place, tensor1), + ASSERT_THROW(launch_context->AssignExternalVariable("var1"), paddle::platform::EnforceNotMet); } TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) { - auto launch_context = CreateDefaultLaunchContext(); platform::CPUPlace place; framework::Scope scope; + auto launch_context = CreateDefaultLaunchContext(); + launch_context->UpdateCapturedEnv(scope, place); auto* tensor4 = scope.Var("var4")->GetMutable(); // not used - ASSERT_THROW(launch_context->AssignExternalVariable("var4", place, tensor4), + ASSERT_THROW(launch_context->AssignExternalVariable("var4"), paddle::platform::EnforceNotMet); // not found - ASSERT_THROW( - launch_context->AssignExternalVariable("cinn_var4", place, tensor4), - paddle::platform::EnforceNotMet); + ASSERT_THROW(launch_context->AssignInternalVariable("cinn_var4"), + paddle::platform::EnforceNotMet); } TEST(CinnLaunchContextTest, TestSetArgument) { + platform::CPUPlace cpu_place; + platform::Place place(cpu_place); + framework::Scope scope; auto launch_context = CreateDefaultLaunchContext(); + launch_context->UpdateCapturedEnv(scope, place); - platform::CPUPlace place; - framework::Scope scope; + // assign external variables auto* tensor1 = scope.Var("var1")->GetMutable(); float* data1 = tensor1->mutable_data(framework::make_ddim({3, 4}), place); data1[0] = 9.99f; data1[10] = 19.99f; + ASSERT_NO_THROW(launch_context->AssignExternalVariable("var1")); - // assign external variable - ASSERT_NO_THROW( - launch_context->AssignExternalVariable("var1", place, tensor1)); - auto* tensor2 = scope.Var("var2")->GetMutable(); - tensor2->mutable_data(framework::make_ddim({6, 7, 8}), place); - ASSERT_NO_THROW( - launch_context->AssignInternalVariable("cinn_var2", place, tensor2)); - // FinalizeArguments not missed check - ASSERT_THROW(launch_context->FinalizeArguments(), - paddle::platform::EnforceNotMet); auto* tensor3 = scope.Var("var3")->GetMutable(); tensor3->mutable_data(framework::make_ddim({10, 16}), place); - ASSERT_NO_THROW( - launch_context->AssignExternalVariable("var3", place, tensor3)); + ASSERT_NO_THROW(launch_context->AssignExternalVariable("var3")); + + // FinalizeArguments missed check + ASSERT_THROW(launch_context->FinalizeArguments(), + paddle::platform::EnforceNotMet); + // test get internal variables + auto internal_variable_names = launch_context->GetInternalVariableNames(); + ASSERT_EQ(internal_variable_names.size(), 1); + EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2"); + auto* tensor2 = scope.Var("var2")->GetMutable(); + tensor2->mutable_data(framework::make_ddim({6, 7, 8}), place); + ASSERT_NO_THROW(launch_context->AssignInternalVariable("cinn_var2")); + + // check argument is set correctly and alloc/free callbacks work well auto name2argument = launch_context->FinalizeArguments(); ASSERT_EQ(name2argument.size(), 3); ASSERT_EQ(name2argument.count("cinn_var1"), 1); - // check ShareTensorWithCinnBuffer + ASSERT_TRUE(launch_context->IsArgumentsInitialized()); + auto* cinn_buffer = static_cast(name2argument.at("cinn_var1")); - ASSERT_EQ(cinn_buffer->memory, nullptr); cinn_buffer->external_malloc->operator()(nullptr, cinn_buffer); ASSERT_NE(cinn_buffer->memory, nullptr); diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc index 813e7b1152f..ea36a19202e 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc @@ -31,26 +31,6 @@ namespace operators { namespace details { #ifdef PADDLE_WITH_CUDA -void CUDART_CB ReleaseScope(void* data) { - auto* temp_scope = static_cast(data); - delete temp_scope; -} - -void CUDART_CB ReleaseBuffers(void* data) { - auto* buffers = - static_cast>*>(data); - delete buffers; -} - -template <> -void ReleaseResource( - const std::vector& resources, void* stream) { - PADDLE_ENFORCE_GPU_SUCCESS(cudaLaunchHostFunc( - static_cast(stream), ReleaseScope, resources[0])); - PADDLE_ENFORCE_GPU_SUCCESS(cudaLaunchHostFunc( - static_cast(stream), ReleaseBuffers, resources[1])); -} - template <> void* GetStream( const framework::ExecutionContext& ctx) { diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index 3a272916332..8a5ca166941 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -56,25 +56,12 @@ void LaunchCinnExecution(const CinnCompiledObject& compiled_obj, // Set cinn FLAGS (such as FLAGS_cinn_cudnn_deterministic) with paddle's FLAGS. void SetCinnRuntimeFlags(); -template -void ReleaseResource(const std::vector& resources, void* stream) { - auto* temp_scope = static_cast(resources[0]); - auto* buffers = - static_cast>*>(resources[1]); - delete temp_scope; - delete buffers; -} - template void* GetStream(const framework::ExecutionContext& ctx) { return nullptr; } #ifdef PADDLE_WITH_CUDA -template <> -void ReleaseResource( - const std::vector& resources, void* stream); - template <> void* GetStream( const framework::ExecutionContext& ctx); @@ -116,56 +103,54 @@ class CinnLaunchOpKernel : public framework::OpKernel { compilation_key, inputs_name2tensor, target, stream); details::DebugCinnCompiledResult(cinn_compiled_object); - auto launch_context = std::make_unique( - cinn_compiled_object.paddle2cinn_varmap, cinn_compiled_object.scope); - + const auto& launch_context = cinn_compiled_object.launch_context; // Step 3. Prepare arguments needed for the compiled executable program. - VLOG(4) << "CinnLaunchOp prepare arguments"; - - // 3.1 Prepare input variables: tensors of input variables have - // been initialized before graph compiled, just check the - // equiality between tensors of paddle and cinn. - for (const auto& var_name : input_variable_names) { - if (!launch_context->IsVariableUsed(var_name)) { - // some input variables don't need for cinn because they are - // eliminated by optimized passes or some cinn operators use - // less variables - VLOG(4) << "Input variable(" << var_name << ") not used by cinn"; - continue; + launch_context->UpdateCapturedEnv(scope, place); + if (!launch_context->IsArgumentsInitialized()) { + VLOG(4) << "CinnLaunchOp prepare arguments"; + + // 3.1 Prepare input variables: tensors of input variables have + // been initialized before graph compiled, just check the + // equiality between tensors of paddle and cinn. + for (const auto& var_name : input_variable_names) { + if (!launch_context->IsVariableUsed(var_name)) { + // some input variables don't need for cinn because they are + // eliminated by optimized passes or some cinn operators use + // less variables + VLOG(4) << "Input variable(" << var_name << ") not used by cinn"; + continue; + } + + launch_context->AssignExternalVariable(var_name); } - launch_context->AssignExternalVariable( - var_name, place, scope.GetVar(var_name)->GetMutable()); - } - - // 3.2 Prepare output variables: all output variables should - // be initialized and allocated buffer before - // the runtime program start execution, the compilation result - // includes details of their buffer assginment and we use that to - // allocate space in Paddle. For those variables allocated yet, - // like persistable parameters, just check the equiality between - // Paddle allocation and CINN buffer assginment. - auto output_variable_names = ctx.OutputNames(kOutputs); - for (const auto var_name : output_variable_names) { - PADDLE_ENFORCE_EQ(launch_context->IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( - "Output variable(%s) not used by cinn", var_name)); - - auto* tensor = scope.GetVar(var_name)->GetMutable(); - launch_context->AssignExternalVariable(var_name, place, tensor); - } + // 3.2 Prepare output variables: all output variables should + // be initialized and allocated buffer before + // the runtime program start execution, the compilation result + // includes details of their buffer assginment and we use that to + // allocate space in Paddle. For those variables allocated yet, + // like persistable parameters, just check the equiality between + // Paddle allocation and CINN buffer assginment. + auto output_variable_names = ctx.OutputNames(kOutputs); + for (const auto var_name : output_variable_names) { + PADDLE_ENFORCE_EQ( + launch_context->IsVariableUsed(var_name), true, + platform::errors::InvalidArgument( + "Output variable(%s) not used by cinn", var_name)); + + launch_context->AssignExternalVariable(var_name); + } - // 3.3 Prepare internal or temporary variables: Create a temporary - // scope to keep internal variables within graph or temporary - // variables needed by the compiled runtime program in addition. - // Here we directly use the names from CinnScope as Paddle variable - // names, because they will not be used outside the graph - // and should be destructed after computation finished. - auto internal_variable_names = launch_context->GetInternalVariableNames(); - framework::Scope* temp_scope = scope.NewTmpScope().release(); - for (const auto& var_name : internal_variable_names) { - auto* tensor = temp_scope->Var(var_name)->GetMutable(); - launch_context->AssignInternalVariable(var_name, place, tensor); + // 3.3 Prepare internal or temporary variables: Create a temporary + // scope to keep internal variables within graph or temporary + // variables needed by the compiled runtime program in addition. + // Here we directly use the names from CinnScope as Paddle variable + // names, because they will not be used outside the graph + // and should be destructed after computation finished. + auto internal_variable_names = launch_context->GetInternalVariableNames(); + for (const auto& var_name : internal_variable_names) { + launch_context->AssignInternalVariable(var_name); + } } // Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic. @@ -175,12 +160,6 @@ class CinnLaunchOpKernel : public framework::OpKernel { VLOG(4) << "Run Cinn compiled executable program with stream: " << stream; details::LaunchCinnExecution(cinn_compiled_object, *launch_context, stream); VLOG(4) << "CinnLaunchOp launch execution done."; - - // Step 6. Release some resources, such as `temp_scope` and cinn_buffers. - auto* buffers_holder = new std::vector>{ - launch_context->HandoverBuffers()}; - details::ReleaseResource({temp_scope, buffers_holder}, - stream); } }; diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index 02373c38184..e10fdf522ff 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -130,8 +130,9 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) { scope.Var(test_out_name)->GetMutable(); scope.Var(expected_out_name)->GetMutable(); - cinn_launch_op->Run(scope, place); - elementwise_add_op->Run(scope, place); + platform::Place run_place(place); + cinn_launch_op->Run(scope, run_place); + elementwise_add_op->Run(scope, run_place); LoDTensor test_out, expected_out; TensorCopySync(scope.Var(test_out_name)->Get(), -- GitLab