未验证 提交 bf38175e 编写于 作者: L Leo Chen 提交者: GitHub

make cinn_launch_op run interpretercore in tracing mode to reduce number of threads (#50472)

* make cinn_launch_op run interpretercore in tracing mode to reduce number of threads

* skip getWorkqueue in tracing mode
上级 84beef80
......@@ -25,9 +25,10 @@ namespace interpreter {
struct ExecutionConfig {
bool used_for_jit{false};
bool create_local_scope{true};
bool used_for_cinn{false};
bool used_for_control_flow_op{false};
bool create_local_scope{true};
size_t host_num_threads;
size_t deivce_num_threads;
......
......@@ -126,6 +126,7 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion);
execution_config_.used_for_jit = used_for_jit;
execution_config_.used_for_cinn = used_for_cinn;
execution_config_.used_for_control_flow_op = used_for_control_flow_op;
execution_config_.create_local_scope =
!used_for_jit && FLAGS_new_executor_use_local_scope &&
......@@ -199,20 +200,21 @@ interpreter::CostInfo InterpreterCore::DryRun(
}
void InterpreterCore::RunImpl() {
// For the program that only run once, it is no need to
// create work_queue, so the async_work_queue_ is created
// until the second step run.
async_work_queue_ = GetWorkQueue();
// lazy initialization of gc, do not create gc is the program only run once
if (!gc_) {
gc_ = CreateInterpreterCoreGarbageCollector(place_, vec_instruction_);
}
if (execution_config_.used_for_jit && (sync_op_num_ == 0)) {
if ((execution_config_.used_for_jit || execution_config_.used_for_cinn) &&
(sync_op_num_ == 0)) {
VLOG(4) << "Tracing Instruction List";
TraceInstructionList(vec_instruction_);
} else {
VLOG(4) << "Non-tracing";
// For the program that only run once, it is no need to
// create work_queue, so the async_work_queue_ is created
// until the second step run.
async_work_queue_ = GetWorkQueue();
ExecuteInstructionList(vec_instruction_);
}
#ifdef PADDLE_WITH_ASCEND_CL
......@@ -409,7 +411,7 @@ std::shared_ptr<interpreter::AsyncWorkQueue> InterpreterCore::GetWorkQueue() {
async_work_queue_ = std::make_shared<interpreter::AsyncWorkQueue>(
execution_config_.host_num_threads,
execution_config_.deivce_num_threads,
&main_thread_blocker_);
nullptr);
}
return async_work_queue_;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册