From 238f82e6be15f0a6723471af91a39c4fa64af292 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 16 Jun 2022 11:08:53 +0800 Subject: [PATCH] [new-exec] lazy creating work queue (#43551) * lazy creating work queue * fix dry_run --- .../framework/new_executor/interpretercore.cc | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index fe0c7fe0721..11d672e8ef0 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -63,8 +63,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, stream_analyzer_(place) { VLOG(4) << "InterpreterCore(): " << this << " on " << place_; is_build_ = false; - async_work_queue_.reset(new interpreter::AsyncWorkQueue( - kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (IsInterpretercoreFastGCEnabled()) { @@ -127,6 +125,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -174,6 +183,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -343,10 +363,6 @@ void InterpreterCore::Convert( if (FLAGS_new_executor_use_inplace && !inplaced) { BuildInplace(); } - - // prepare for the first time. - async_work_queue_->PrepareAtomicDeps(dependecy_count_); - async_work_queue_->PrepareAtomicVarRef(vec_meta_info); } bool InterpreterCore::BuildInplaceCheckVarIsOnlyInput(size_t var_index) { @@ -940,6 +956,18 @@ interpreter::CostInfo InterpreterCore::DryRun( interpreter::CostInfo cost_info; { interpreter::ProfilerGuard(place_, &cost_info); + + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); platform::DeviceContextPool::Instance().Get(place_)->Wait(); } -- GitLab