diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index d24f38a5ee70f02fc42f538d1dbc603aaeb1d3b9..dd1c0d885efdda3e3779a75c5409b7b80fc7aa18 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -61,8 +61,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, stream_analyzer_(place) { VLOG(4) << "InterpreterCore(): " << this << " on " << place_; is_build_ = false; - async_work_queue_.reset(new interpreter::AsyncWorkQueue( - kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (IsInterpretercoreFastGCEnabled()) { @@ -125,6 +123,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -172,6 +181,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -341,10 +361,6 @@ void InterpreterCore::Convert( if (FLAGS_new_executor_use_inplace && !inplaced) { BuildInplace(); } - - // prepare for the first time. - async_work_queue_->PrepareAtomicDeps(dependecy_count_); - async_work_queue_->PrepareAtomicVarRef(vec_meta_info); } bool InterpreterCore::BuildInplaceCheckVarIsOnlyInput(size_t var_index) { @@ -924,6 +940,18 @@ interpreter::CostInfo InterpreterCore::DryRun( interpreter::CostInfo cost_info; { interpreter::ProfilerGuard(place_, &cost_info); + + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); platform::DeviceContextPool::Instance().Get(place_)->Wait(); }