From 8b2c906af3b14997dfe09ac215b89e711252a706 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Wed, 10 Nov 2021 15:01:58 +0800
Subject: [PATCH] Simplify constructor of InterpreterCore (#37072)

* Simplify constructor of InterpreterCore

* fix bool

* clean code
---
 .../framework/new_executor/event_manager.cc   |   9 +-
 .../framework/new_executor/event_manager.h    |  11 +-
 .../framework/new_executor/interpretercore.cc | 102 ++++++++----------
 .../framework/new_executor/interpretercore.h  |  28 +++--
 .../new_executor/new_executor_defs.h          |   6 ++
 .../fluid/framework/new_executor/profiler.h   |  73 +++----------
 .../new_executor/standalone_executor.cc       |  11 +-
 .../new_executor/standalone_executor.h        |   5 +-
 paddle/fluid/pybind/pybind.cc                 |  12 ++-
 9 files changed, 100 insertions(+), 157 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/event_manager.cc b/paddle/fluid/framework/new_executor/event_manager.cc
index 87caff8c57..a45f65d264 100644
--- a/paddle/fluid/framework/new_executor/event_manager.cc
+++ b/paddle/fluid/framework/new_executor/event_manager.cc
@@ -16,9 +16,8 @@
 
 namespace paddle {
 namespace framework {
-
-void EventManager::WaitEvent(const Instruction& instruction,
-                             const platform::Place& place) {
+namespace interpreter {
+void WaitEvent(const Instruction& instruction, const platform::Place& place) {
   // If InterpreterCore in on CPUPlace, do nothing.
   if (platform::is_cpu_place(place)) return;
 
@@ -32,8 +31,7 @@ void EventManager::WaitEvent(const Instruction& instruction,
   }
 }
 
-void EventManager::RecordEvent(const Instruction& instruction,
-                               const platform::Place& place) {
+void RecordEvent(const Instruction& instruction, const platform::Place& place) {
   // If InterpreterCore in on CPUPlace, do nothing.
   if (platform::is_cpu_place(place)) return;
 
@@ -43,5 +41,6 @@ void EventManager::RecordEvent(const Instruction& instruction,
   }
 }
 
+}  // namespace interpreter
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/event_manager.h b/paddle/fluid/framework/new_executor/event_manager.h
index d23c240469..a949ae1440 100644
--- a/paddle/fluid/framework/new_executor/event_manager.h
+++ b/paddle/fluid/framework/new_executor/event_manager.h
@@ -17,14 +17,11 @@
 
 namespace paddle {
 namespace framework {
+namespace interpreter {
+void RecordEvent(const Instruction& instruction, const platform::Place& place);
 
-class EventManager {
- public:
-  void RecordEvent(const Instruction& instruction,
-                   const platform::Place& place);
-
-  void WaitEvent(const Instruction& instruction, const platform::Place& place);
-};
+void WaitEvent(const Instruction& instruction, const platform::Place& place);
 
+}  // namespace interpreter
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index ea3e7dd411..89810fd303 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -33,9 +33,9 @@ namespace framework {
 // NOTE(Aurelius84): Need a better strategy to determine it.
 static constexpr size_t kHostNumThreads = 4;
 
-InterpreterCore::InterpreterCore(const platform::Place& place, BlockDesc* block,
-                                 VariableScope* global_scope,
-                                 const std::vector<std::string>& feed_names)
+InterpreterCore::InterpreterCore(const platform::Place& place,
+                                 const BlockDesc& block,
+                                 VariableScope* global_scope)
     : place_(place),
       block_(block),
       global_scope_(global_scope),
@@ -45,8 +45,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, BlockDesc* block,
       new interpreter::AsyncWorkQueue(kHostNumThreads, &main_thread_blocker_));
   gc_.reset(new InterpreterCoreGarbageCollector());
 
-  feed_names_ = feed_names;
-
   exception_notifier_ = main_thread_blocker_.RegisterEvent(
       kExceptionCaught, [this]() { return exception_holder_.IsCaught(); });
 
@@ -65,27 +63,12 @@ InterpreterCore::~InterpreterCore() {
 }
 
 paddle::framework::FetchList InterpreterCore::Run(
+    const std::vector<std::string>& feed_names,
     const std::vector<framework::LoDTensor>& feed_tensors) {
-  auto FeedInput = [&] {
-    for (size_t i = 0; i < feed_names_.size(); ++i) {
-      auto* feed_var = global_scope_->Var(feed_names_[i]);
-      auto feed_tensor = feed_var->GetMutable<framework::LoDTensor>();
-      feed_tensor->ShareDataWith(feed_tensors[i]);
-      feed_tensor->set_lod(feed_tensors[i].lod());
-    }
-  };
+  bool is_build = is_build_;
+  Prepare(feed_names, feed_tensors, is_build);
 
-  if (is_build_ == false) {
-    paddle::framework::interpreter::build_variable_scope(*block_,
-                                                         global_scope_);
-    FeedInput();
-    paddle::framework::interpreter::build_op_func_list(
-        place_, *block_, &vec_func_list_, global_scope_);
-    is_build_ = true;
-    // convert vec func_list to graph
-    Convert();
-  } else {
-    FeedInput();
+  if (is_build) {
     ExecuteInstructionList(vec_instruction_);
   }
 
@@ -95,9 +78,9 @@ paddle::framework::FetchList InterpreterCore::Run(
 }
 
 void InterpreterCore::Convert() {
+  auto& vec_meta_info = global_scope_->MutableVecMetaInfo();
   auto var_nums = global_scope_->VarSize();
   input_var2op_info_.resize(var_nums);
-  vec_meta_info_.resize(var_nums);
 
   auto op_nums = vec_func_list_.size();
   vec_instruction_.reserve(op_nums);
@@ -136,7 +119,7 @@ void InterpreterCore::Convert() {
     gc_check_input_list.erase(last, gc_check_input_list.end());
 
     for (auto var_id : gc_check_input_list) {
-      vec_meta_info_[var_id].var_ref_count_++;
+      vec_meta_info[var_id].var_ref_count_++;
       instr.AddGCCheckVar(var_id);
     }
   }
@@ -148,7 +131,7 @@ void InterpreterCore::Convert() {
         if (input_var2op_info_.at(id).size() == 0) {
           // output var not be used by any kernel
           vec_instruction_[i].AddGCCheckVar(id);
-          vec_meta_info_[id].var_ref_count_++;
+          vec_meta_info[id].var_ref_count_++;
         }
       }
     }
@@ -180,7 +163,7 @@ void InterpreterCore::Convert() {
   }
 
   for (size_t i = 0; i < vec_instruction_.size(); ++i) {
-    BuildAndCacheInstructionCtx(&vec_instruction_[i], *global_scope_, place_);
+    BuildAndCacheInstructionCtx(&vec_instruction_[i]);
   }
 
   BuildSkipShareLoDInfo();
@@ -248,16 +231,14 @@ void InterpreterCore::BuildInplace() {
   }
 }
 
-void InterpreterCore::BuildAndCacheInstructionCtx(
-    Instruction* instr_node, const VariableScope& var_scope,
-    const platform::Place& place) {
+void InterpreterCore::BuildAndCacheInstructionCtx(Instruction* instr_node) {
   VariableValueMap ins_map;
   for (auto& var_name_item : instr_node->Inputs()) {
     std::vector<Variable*> input_vars;
 
     input_vars.reserve(var_name_item.second.size());
     for (auto& id : var_name_item.second) {
-      input_vars.emplace_back(var_scope.Var(id));
+      input_vars.emplace_back(global_scope_->Var(id));
     }
     ins_map.emplace(var_name_item.first, std::move(input_vars));
   }
@@ -268,7 +249,7 @@ void InterpreterCore::BuildAndCacheInstructionCtx(
 
     out_vars.reserve(var_name_item.second.size());
     for (auto& id : var_name_item.second) {
-      out_vars.emplace_back(var_scope.Var(id));
+      out_vars.emplace_back(global_scope_->Var(id));
     }
     outs_map.emplace(var_name_item.first, std::move(out_vars));
   }
@@ -359,7 +340,7 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
 void InterpreterCore::ExecuteInstructionList(
     const std::vector<Instruction>& vec_instr) {
   async_work_queue_->PrepareAtomicDeps(dependecy_count_);
-  async_work_queue_->PrepareAtomicVarRef(vec_meta_info_);
+  async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo());
   op_run_number_ = 0;
 
   exception_holder_.Clear();
@@ -452,7 +433,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
     auto& instr_node = vec_instruction_.at(instr_id);
     auto* op = instr_node.OpBase();
     platform::RecordEvent instruction_event(op->Type());
-    event_manager_.WaitEvent(instr_node, place_);
+    interpreter::WaitEvent(instr_node, place_);
 
     try {
       RunInstruction(instr_node);
@@ -479,7 +460,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
       return;
     }
 
-    event_manager_.RecordEvent(instr_node, place_);
+    interpreter::RecordEvent(instr_node, place_);
     op_run_number_.fetch_add(1, std::memory_order_relaxed);
 
     // GC infomation
@@ -508,11 +489,18 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
   }
 }
 
-void InterpreterCore::DryRunPrepare(
-    const std::vector<framework::LoDTensor>& feed_tensors) {
+void InterpreterCore::Prepare(
+    const std::vector<std::string>& feed_names,
+    const std::vector<framework::LoDTensor>& feed_tensors, bool prepare_feed) {
+  PADDLE_ENFORCE_EQ(feed_names.size(), feed_tensors.size(),
+                    platform::errors::PreconditionNotMet(
+                        "Required feed_names.size() == feed_tensors.size(), "
+                        "but received %d != %d",
+                        feed_names.size(), feed_tensors.size()));
+
   auto FeedInput = [&] {
-    for (size_t i = 0; i < feed_names_.size(); ++i) {
-      auto* feed_var = global_scope_->FindVar(feed_names_[i]);
+    for (size_t i = 0; i < feed_names.size(); ++i) {
+      auto* feed_var = global_scope_->FindVar(feed_names[i]);
       PADDLE_ENFORCE_NOT_NULL(feed_var, platform::errors::NotFound(
                                             "feed_var shall not be nullptr."));
 
@@ -522,35 +510,33 @@ void InterpreterCore::DryRunPrepare(
     }
   };
 
-  if (is_build_ == false) {
-    paddle::framework::interpreter::build_variable_scope(*block_,
-                                                         global_scope_);
+  if (!is_build_) {
+    paddle::framework::interpreter::build_variable_scope(block_, global_scope_);
     FeedInput();
     paddle::framework::interpreter::build_op_func_list(
-        place_, *block_, &vec_func_list_, global_scope_);
+        place_, block_, &vec_func_list_, global_scope_);
     is_build_ = true;
     // convert vec func_list to graph
     Convert();
   }
   // NOTE: Because feed_tensor will be GC after
   // paddle::framework::build_op_func_list, so we should
-  // call
-  // FeedInput again.
-  FeedInput();
+  // call FeedInput again.
+  if (prepare_feed) FeedInput();
 }
 
-const CostInfo& InterpreterCore::DryRun(
+interpreter::CostInfo InterpreterCore::DryRun(
+    const std::vector<std::string>& feed_names,
     const std::vector<framework::LoDTensor>& feed_tensors) {
-  DryRunPrepare(feed_tensors);
-  // DryRun may be called many times.
-  dry_run_profiler_.Reset();
-  dry_run_profiler_.Start();
-  ExecuteInstructionList(vec_instruction_);
-  platform::DeviceContextPool::Instance().Get(place_)->Wait();
-
-  dry_run_profiler_.Pause();
-  dry_run_profiler_.TotalCUDAAllocatedMemorySize(place_);
-  return dry_run_profiler_.GetCostInfo();
+  Prepare(feed_names, feed_tensors, true);
+  interpreter::CostInfo cost_info;
+  {
+    interpreter::ProfilerGuard(place_, &cost_info);
+    ExecuteInstructionList(vec_instruction_);
+    platform::DeviceContextPool::Instance().Get(place_)->Wait();
+  }
+
+  return cost_info;
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h
index 3a6876a912..915ae782e2 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
@@ -40,23 +40,23 @@ using AtomicVectorSizeT = std::vector<std::unique_ptr<std::atomic<size_t>>>;
 
 class InterpreterCore {
  public:
-  InterpreterCore(const platform::Place& place, BlockDesc* block,
-                  VariableScope* global_scope,
-                  const std::vector<std::string>& feed_names);
+  InterpreterCore(const platform::Place& place, const BlockDesc& block,
+                  VariableScope* global_scope);
 
   ~InterpreterCore();
 
   paddle::framework::FetchList Run(
+      const std::vector<std::string>& feed_names,
       const std::vector<framework::LoDTensor>& feed_tensors);
 
-  const CostInfo& DryRun(const std::vector<framework::LoDTensor>& feed_tensors);
+  interpreter::CostInfo DryRun(
+      const std::vector<std::string>& feed_names,
+      const std::vector<framework::LoDTensor>& feed_tensors);
 
  private:
   void Convert();
 
-  void BuildAndCacheInstructionCtx(Instruction* instr_node,
-                                   const VariableScope& var_scope,
-                                   const platform::Place& place);
+  void BuildAndCacheInstructionCtx(Instruction* instr_node);
 
   void BuildInplace();
 
@@ -66,7 +66,9 @@ class InterpreterCore {
 
   void ExecuteInstructionList(const std::vector<Instruction>& vec_instr);
 
-  void DryRunPrepare(const std::vector<framework::LoDTensor>& feed_tensors);
+  void Prepare(const std::vector<std::string>& feed_names,
+               const std::vector<framework::LoDTensor>& feed_tensors,
+               bool prepare_feed);
 
   void CheckGC(const Instruction& instr);
 
@@ -79,22 +81,17 @@ class InterpreterCore {
   bool is_build_;
 
   const platform::Place& place_;
-  BlockDesc* block_;             // not owned
+  const BlockDesc& block_;       // not owned
   VariableScope* global_scope_;  // not owned
 
   std::vector<paddle::framework::OpFuncNode> vec_func_list_;
   std::vector<Instruction> vec_instruction_;  // deconstruct before OpFuncNode
 
-  InstructionInfo instruction_info_;
   std::vector<size_t> dependecy_count_;
+  std::atomic<size_t> op_run_number_{0};
   std::vector<std::vector<size_t>> input_var2op_info_;
-  std::vector<VariableMetaInfo> vec_meta_info_;
 
-  std::vector<std::string> feed_names_;
-
-  InterpreterProfiler dry_run_profiler_;
   StreamAnalyzer stream_analyzer_;
-  EventManager event_manager_;
   EventsWaiter main_thread_blocker_;
   std::unique_ptr<interpreter::AsyncWorkQueue> async_work_queue_;
   details::ExceptionHolder exception_holder_;
@@ -102,7 +99,6 @@ class InterpreterCore {
 
   std::unique_ptr<InterpreterCoreGarbageCollector> gc_;
   std::vector<paddle::platform::DeviceEvent> gc_event_;
-  std::atomic<size_t> op_run_number_{0};
 };
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h
index f4832cb283..37fb57072f 100644
--- a/paddle/fluid/framework/new_executor/new_executor_defs.h
+++ b/paddle/fluid/framework/new_executor/new_executor_defs.h
@@ -607,6 +607,12 @@ class VariableScope : public ScopeBase {
         platform::errors::NotFound("%s not in VariableScope.", name));
   }
 
+  std::vector<VariableMetaInfo>& MutableVecMetaInfo() { return vec_meta_info_; }
+
+  const std::vector<VariableMetaInfo>& VecMetaInfo() const {
+    return vec_meta_info_;
+  }
+
  private:
   std::vector<Variable*> var_list_;
   std::map<std::string, int> name2id_;
diff --git a/paddle/fluid/framework/new_executor/profiler.h b/paddle/fluid/framework/new_executor/profiler.h
index 77783535b6..51c9e3d66a 100644
--- a/paddle/fluid/framework/new_executor/profiler.h
+++ b/paddle/fluid/framework/new_executor/profiler.h
@@ -20,84 +20,41 @@
 
 namespace paddle {
 namespace framework {
-
-static void GetTensors(Variable* var, std::unordered_set<Tensor*>* tensor_set) {
-  if (var->IsType<LoDTensor>() && var->Get<LoDTensor>().IsInitialized()) {
-    tensor_set->insert(var->GetMutable<LoDTensor>());
-  } else if (var->IsType<SelectedRows>() &&
-             var->Get<SelectedRows>().value().IsInitialized()) {
-    tensor_set->insert(var->GetMutable<SelectedRows>()->mutable_value());
-  } else if (var->IsType<LoDTensorArray>()) {
-    auto* tensor_arr = var->GetMutable<LoDTensorArray>();
-    for (auto& t : *tensor_arr) {
-      if (t.IsInitialized()) {
-        tensor_set->insert(&t);
-      }
-    }
-  }
-}
-
-static std::pair<size_t, size_t> GetTensorMemorySize(
-    const std::vector<Variable*>& var_list) {
-  std::unordered_set<Tensor*> tensor_set;
-  for (auto* var : var_list) {
-    GetTensors(var, &tensor_set);
-  }
-  size_t host_memory_bytes = 0;
-  size_t device_memory_bytes = 0;
-  std::unordered_set<memory::Allocation*> allocation_set;
-  for (auto* tensor : tensor_set) {
-    auto allocation = tensor->Holder().get();
-    if (!allocation_set.count(allocation)) {
-      allocation_set.insert(allocation);
-      if (platform::is_cuda_pinned_place(tensor->place()) ||
-          platform::is_cpu_place(tensor->place())) {
-        VLOG(3) << "found host memory : " << allocation->size();
-        host_memory_bytes += allocation->size();
-      } else {
-        VLOG(3) << "found device memory : " << allocation->size();
-        device_memory_bytes += allocation->size();
-      }
-    }
-  }
-  return {host_memory_bytes, device_memory_bytes};
-}
-
+namespace interpreter {
 struct CostInfo {
   double total_time{0.};          // ms
   size_t device_memory_bytes{0};  // total allocated memory size
 };
 
-class InterpreterProfiler {
+class ProfilerGuard {
  public:
-  void Start() { timer_.Start(); }
-
-  void Pause() {
-    timer_.Pause();
-    cost_info_.total_time += timer_.ElapsedMS();
+  ProfilerGuard(const platform::Place& place, CostInfo* cost_info)
+      : place_(place), cost_info_(cost_info) {
+    timer_.Start();
   }
 
-  void Reset() {
-    timer_.Reset();
-    cost_info_.total_time = 0.;
-    cost_info_.device_memory_bytes = 0;
+  ~ProfilerGuard() {
+    timer_.Pause();
+    cost_info_->total_time += timer_.ElapsedMS();
+    TotalCUDAAllocatedMemorySize(place_);
   }
 
+ private:
   void TotalCUDAAllocatedMemorySize(const platform::Place& place) {
     if (platform::is_gpu_place(place)) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
       auto cuda_place = BOOST_GET_CONST(platform::CUDAPlace, place);
-      cost_info_.device_memory_bytes =
+      cost_info_->device_memory_bytes =
           platform::RecordedCudaMallocSize(cuda_place.device);
 #endif
     }
   }
 
-  const CostInfo& GetCostInfo() const { return cost_info_; }
-
- private:
+  const platform::Place& place_;
+  CostInfo* cost_info_;
   platform::Timer timer_;
-  CostInfo cost_info_;
 };
+
+}  // namespace interpreter
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc
index 6d3b531c50..e3bcbaec7a 100644
--- a/paddle/fluid/framework/new_executor/standalone_executor.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor.cc
@@ -51,16 +51,15 @@ paddle::framework::FetchList StandaloneExecutor::Run(
     const std::vector<std::string>& fetch_names) {
   auto core = GetInterpreterCore(feed_names, fetch_names);
 
-  return core->Run(feed_tensors);
+  return core->Run(feed_names, feed_tensors);
 }
 
-const CostInfo& StandaloneExecutor::DryRun(
+framework::interpreter::CostInfo StandaloneExecutor::DryRun(
     const std::vector<std::string>& feed_names,
     const std::vector<framework::LoDTensor>& feed_tensors) {
   auto core = GetInterpreterCore(feed_names, {});
 
-  auto& cost_info = core->DryRun(feed_tensors);
-  return cost_info;
+  return core->DryRun(feed_names, feed_tensors);
 }
 
 void StandaloneExecutor::BuildVariableOuterScope(
@@ -102,8 +101,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
     auto* block = new_prog->MutableBlock(0);
     interpreter::add_fetch(fetch_names, block);
 
-    auto core = std::make_shared<InterpreterCore>(place_, block, &global_scope_,
-                                                  feed_names);
+    auto core =
+        std::make_shared<InterpreterCore>(place_, *block, &global_scope_);
     programs_.emplace(oss.str(), new_prog);
     interpretercores_.emplace(oss.str(), core);
     return core;
diff --git a/paddle/fluid/framework/new_executor/standalone_executor.h b/paddle/fluid/framework/new_executor/standalone_executor.h
index 6d48a85142..1fbdf7b4b0 100644
--- a/paddle/fluid/framework/new_executor/standalone_executor.h
+++ b/paddle/fluid/framework/new_executor/standalone_executor.h
@@ -45,8 +45,9 @@ class StandaloneExecutor : public ExecutorBase {
       const std::vector<framework::LoDTensor>& feed_tensors,
       const std::vector<std::string>& fetch_names);
 
-  const CostInfo& DryRun(const std::vector<std::string>& feed_names,
-                         const std::vector<framework::LoDTensor>& feed_tensors);
+  framework::interpreter::CostInfo DryRun(
+      const std::vector<std::string>& feed_names,
+      const std::vector<framework::LoDTensor>& feed_tensors);
 
  private:
   void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 050a25c478..a5b0b1cd2a 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -2069,11 +2069,13 @@ All parameter, weight, gradient are variables in Paddle.
                  fetch_vars);
       });
 
-  py::class_<framework::CostInfo>(m, "CostInfo")
+  py::class_<framework::interpreter::CostInfo>(m, "CostInfo")
       .def(py::init<>())
-      .def("total_time", [](CostInfo &self) { return self.total_time; })
-      .def("device_memory_bytes",
-           [](CostInfo &self) { return self.device_memory_bytes; });
+      .def("total_time",
+           [](interpreter::CostInfo &self) { return self.total_time; })
+      .def("device_memory_bytes", [](interpreter::CostInfo &self) {
+        return self.device_memory_bytes;
+      });
 
   py::class_<framework::StandaloneExecutor>(m, "StandaloneExecutor")
       .def(py::init<const platform::Place &, const ProgramDesc &,
@@ -2134,7 +2136,7 @@ All parameter, weight, gradient are variables in Paddle.
                feed_tensors.push_back(t);
              }
 
-             CostInfo cost_info;
+             framework::interpreter::CostInfo cost_info;
              {
                pybind11::gil_scoped_release release;
                cost_info = self.DryRun(feed_names, feed_tensors);
-- 
GitLab