diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index fe0c7fe0721780401b903b05a8b0916f62360531..11d672e8ef07643a89c50bac92ddb4212250f3cf 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -63,8 +63,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, stream_analyzer_(place) { VLOG(4) << "InterpreterCore(): " << this << " on " << place_; is_build_ = false; - async_work_queue_.reset(new interpreter::AsyncWorkQueue( - kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (IsInterpretercoreFastGCEnabled()) { @@ -127,6 +125,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -174,6 +183,17 @@ paddle::framework::FetchList InterpreterCore::Run( // add listener before run and is_build=true global_scope_->ResetListener(); + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); } @@ -343,10 +363,6 @@ void InterpreterCore::Convert( if (FLAGS_new_executor_use_inplace && !inplaced) { BuildInplace(); } - - // prepare for the first time. - async_work_queue_->PrepareAtomicDeps(dependecy_count_); - async_work_queue_->PrepareAtomicVarRef(vec_meta_info); } bool InterpreterCore::BuildInplaceCheckVarIsOnlyInput(size_t var_index) { @@ -940,6 +956,18 @@ interpreter::CostInfo InterpreterCore::DryRun( interpreter::CostInfo cost_info; { interpreter::ProfilerGuard(place_, &cost_info); + + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + if (async_work_queue_ == nullptr) { + async_work_queue_ = std::make_unique( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_); + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + } + ExecuteInstructionList(vec_instruction_); platform::DeviceContextPool::Instance().Get(place_)->Wait(); }