From bf38175e59b5eac69aac56c2177956bf519338b8 Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Wed, 15 Feb 2023 17:05:40 +0800
Subject: [PATCH] make cinn_launch_op run interpretercore in tracing mode to
 reduce number of threads (#50472)

* make cinn_launch_op run interpretercore in tracing mode to reduce number of threads

* skip getWorkqueue in tracing mode
---
 .../new_executor/interpreter/execution_config.h  |  3 ++-
 .../framework/new_executor/interpretercore.cc    | 16 +++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.h b/paddle/fluid/framework/new_executor/interpreter/execution_config.h
index 6934723146..4ca33857e8 100644
--- a/paddle/fluid/framework/new_executor/interpreter/execution_config.h
+++ b/paddle/fluid/framework/new_executor/interpreter/execution_config.h
@@ -25,9 +25,10 @@ namespace interpreter {
 
 struct ExecutionConfig {
   bool used_for_jit{false};
-  bool create_local_scope{true};
+  bool used_for_cinn{false};
   bool used_for_control_flow_op{false};
 
+  bool create_local_scope{true};
   size_t host_num_threads;
   size_t deivce_num_threads;
 
diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index 2f9fadf730..1f373f7456 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -126,6 +126,7 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
   completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion);
 
   execution_config_.used_for_jit = used_for_jit;
+  execution_config_.used_for_cinn = used_for_cinn;
   execution_config_.used_for_control_flow_op = used_for_control_flow_op;
   execution_config_.create_local_scope =
       !used_for_jit && FLAGS_new_executor_use_local_scope &&
@@ -199,20 +200,21 @@ interpreter::CostInfo InterpreterCore::DryRun(
 }
 
 void InterpreterCore::RunImpl() {
-  // For the program that only run once, it is no need to
-  // create work_queue, so the async_work_queue_ is created
-  // until the second step run.
-  async_work_queue_ = GetWorkQueue();
-
   // lazy initialization of gc, do not create gc is the program only run once
   if (!gc_) {
     gc_ = CreateInterpreterCoreGarbageCollector(place_, vec_instruction_);
   }
 
-  if (execution_config_.used_for_jit && (sync_op_num_ == 0)) {
+  if ((execution_config_.used_for_jit || execution_config_.used_for_cinn) &&
+      (sync_op_num_ == 0)) {
     VLOG(4) << "Tracing Instruction List";
     TraceInstructionList(vec_instruction_);
   } else {
+    VLOG(4) << "Non-tracing";
+    // For the program that only run once, it is no need to
+    // create work_queue, so the async_work_queue_ is created
+    // until the second step run.
+    async_work_queue_ = GetWorkQueue();
     ExecuteInstructionList(vec_instruction_);
   }
 #ifdef PADDLE_WITH_ASCEND_CL
@@ -409,7 +411,7 @@ std::shared_ptr<interpreter::AsyncWorkQueue> InterpreterCore::GetWorkQueue() {
     async_work_queue_ = std::make_shared<interpreter::AsyncWorkQueue>(
         execution_config_.host_num_threads,
         execution_config_.deivce_num_threads,
-        &main_thread_blocker_);
+        nullptr);
   }
   return async_work_queue_;
 }
-- 
GitLab