From bf38175e59b5eac69aac56c2177956bf519338b8 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 15 Feb 2023 17:05:40 +0800 Subject: [PATCH] make cinn_launch_op run interpretercore in tracing mode to reduce number of threads (#50472) * make cinn_launch_op run interpretercore in tracing mode to reduce number of threads * skip getWorkqueue in tracing mode --- .../new_executor/interpreter/execution_config.h | 3 ++- .../framework/new_executor/interpretercore.cc | 16 +++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.h b/paddle/fluid/framework/new_executor/interpreter/execution_config.h index 6934723146..4ca33857e8 100644 --- a/paddle/fluid/framework/new_executor/interpreter/execution_config.h +++ b/paddle/fluid/framework/new_executor/interpreter/execution_config.h @@ -25,9 +25,10 @@ namespace interpreter { struct ExecutionConfig { bool used_for_jit{false}; - bool create_local_scope{true}; + bool used_for_cinn{false}; bool used_for_control_flow_op{false}; + bool create_local_scope{true}; size_t host_num_threads; size_t deivce_num_threads; diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 2f9fadf730..1f373f7456 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -126,6 +126,7 @@ InterpreterCore::InterpreterCore(const platform::Place& place, completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion); execution_config_.used_for_jit = used_for_jit; + execution_config_.used_for_cinn = used_for_cinn; execution_config_.used_for_control_flow_op = used_for_control_flow_op; execution_config_.create_local_scope = !used_for_jit && FLAGS_new_executor_use_local_scope && @@ -199,20 +200,21 @@ interpreter::CostInfo InterpreterCore::DryRun( } void InterpreterCore::RunImpl() { - // For the program that only run once, it is no need to - // create work_queue, so the async_work_queue_ is created - // until the second step run. - async_work_queue_ = GetWorkQueue(); - // lazy initialization of gc, do not create gc is the program only run once if (!gc_) { gc_ = CreateInterpreterCoreGarbageCollector(place_, vec_instruction_); } - if (execution_config_.used_for_jit && (sync_op_num_ == 0)) { + if ((execution_config_.used_for_jit || execution_config_.used_for_cinn) && + (sync_op_num_ == 0)) { VLOG(4) << "Tracing Instruction List"; TraceInstructionList(vec_instruction_); } else { + VLOG(4) << "Non-tracing"; + // For the program that only run once, it is no need to + // create work_queue, so the async_work_queue_ is created + // until the second step run. + async_work_queue_ = GetWorkQueue(); ExecuteInstructionList(vec_instruction_); } #ifdef PADDLE_WITH_ASCEND_CL @@ -409,7 +411,7 @@ std::shared_ptr InterpreterCore::GetWorkQueue() { async_work_queue_ = std::make_shared( execution_config_.host_num_threads, execution_config_.deivce_num_threads, - &main_thread_blocker_); + nullptr); } return async_work_queue_; } -- GitLab