From 0c44dd64078dc9e0b8c7f3ef7a139c8f9ace3379 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Sat, 25 Jun 2022 09:08:05 +0800 Subject: [PATCH] [new-exec] lazy creating work queue (#43551) (#43768) * lazy creating work queue * fix dry_run --- .../framework/new_executor/interpretercore.cc | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index d24f38a5ee7..dd1c0d885ef 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -61,8 +61,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, stream_analyzer_(place) { VLOG(4) << "InterpreterCore(): " << this << " on " << place_; is_build_ = false; - async_work_queue_.reset(new interpreter::AsyncWorkQueue( - kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (IsInterpretercoreFastGCEnabled()) { @@ -125,6 +123,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -172,6 +181,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -341,10 +361,6 @@ void InterpreterCore::Convert( if (FLAGS_new_executor_use_inplace && !inplaced) { BuildInplace(); } - - // prepare for the first time. - async_work_queue_->PrepareAtomicDeps(dependecy_count_); - async_work_queue_->PrepareAtomicVarRef(vec_meta_info); } bool InterpreterCore::BuildInplaceCheckVarIsOnlyInput(size_t var_index) { @@ -924,6 +940,18 @@ interpreter::CostInfo InterpreterCore::DryRun( interpreter::CostInfo cost_info; { interpreter::ProfilerGuard(place_, &cost_info); + + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); platform::DeviceContextPool::Instance().Get(place_)->Wait(); } -- GitLab