[new-exec] support runing with different scope and the same program using scope_guard (#43962)

* support scope_guard * fix test

[new-exec] support runing with different scope and the same program using scope_guard (#43962)
* support scope_guard * fix test
99a4ff8f · Leo Chen · GitHub · 8279dfea · 99a4ff8f · 99a4ff8f
5 changed file
--- a/paddle/fluid/framework/new_executor/standalone_executor.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor.cc
@@ -28,44 +28,50 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
      scope_(scope) {
  // NOTE(zhiqiu): for startup_program, run once ?
  if (startup_prog.Block(0).AllOps().size() > 0) {
-    auto core = GetInterpreterCore(startup_prog, {}, {}, false);
+    auto core = GetInterpreterCore(scope, startup_prog, {}, {}, false);
    VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core;
    core->Run({});
  }
 }

 paddle::framework::FetchList StandaloneExecutor::Run(
+    Scope* scope,
    const std::vector<std::string>& feed_names,
    const std::vector<framework::LoDTensor>& feed_tensors,
    const std::vector<std::string>& fetch_names) {
  platform::RecordEvent record_event(
      "StandaloneExecutor::run", platform::TracerEventType::UserDefined, 1);

-  auto core = GetInterpreterCore(main_prog_, feed_names, fetch_names, true);
+  auto core =
+      GetInterpreterCore(scope, main_prog_, feed_names, fetch_names, true);

  return core->Run(feed_names, feed_tensors);
 }

 paddle::framework::FetchList StandaloneExecutor::Run(
+    Scope* scope,
    const std::vector<std::string>& feed_names,
    const std::vector<std::string>& fetch_names) {
  platform::RecordEvent record_event(
      "StandaloneExecutor::run", platform::TracerEventType::UserDefined, 1);

-  auto core = GetInterpreterCore(main_prog_, feed_names, fetch_names, false);
+  auto core =
+      GetInterpreterCore(scope, main_prog_, feed_names, fetch_names, false);
  VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core;
  return core->Run(feed_names);
 }

 framework::interpreter::CostInfo StandaloneExecutor::DryRun(
+    Scope* scope,
    const std::vector<std::string>& feed_names,
    const std::vector<framework::LoDTensor>& feed_tensors) {
-  auto core = GetInterpreterCore(main_prog_, feed_names, {}, true);
+  auto core = GetInterpreterCore(scope, main_prog_, feed_names, {}, true);

  return core->DryRun(feed_names, feed_tensors);
 }

 std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
+    Scope* scope,
    const ProgramDesc& prog,
    const std::vector<std::string>& feed_names,
    const std::vector<std::string>& fetch_names,
@@ -79,6 +85,7 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
  for (auto& fetchname : fetch_names) {
    oss << fetchname << ",";
  }
+  oss << "scope:" << scope;

  auto iter = interpretercores_.find(oss.str());

@@ -89,13 +96,13 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
    std::shared_ptr<InterpreterCore> core = nullptr;

    if (add_fetch_op) {
-      core = CreateInterpreterCore(place_, prog, scope_, fetch_names);
+      core = CreateInterpreterCore(place_, prog, scope, fetch_names);
    } else {
      core = std::make_shared<InterpreterCore>(
          place_,
          prog.Block(0),
          /*skip_gc_vars=*/std::set<std::string>(),
-          scope_);
+          scope);
    }
    interpretercores_.emplace(oss.str(), core);
    return core;

--- a/paddle/fluid/framework/new_executor/standalone_executor.h
+++ b/paddle/fluid/framework/new_executor/standalone_executor.h
@@ -39,6 +39,7 @@ class StandaloneExecutor {
  ~StandaloneExecutor() {}

  paddle::framework::FetchList Run(
+      Scope* scope,
      const std::vector<std::string>& feed_names,
      const std::vector<framework::LoDTensor>& feed_tensors,
      const std::vector<std::string>& fetch_names);
@@ -46,15 +47,18 @@ class StandaloneExecutor {
  // NOTE(zhiqiu): feed_names are only used for caching interpretercore.
  // fetch_names are used for caching interpretercore and inserting fetch ops,
  // the latter can be moved to python side.
-  paddle::framework::FetchList Run(const std::vector<std::string>& feed_names,
+  paddle::framework::FetchList Run(Scope* scope,
+                                   const std::vector<std::string>& feed_names,
                                   const std::vector<std::string>& fetch_names);

  framework::interpreter::CostInfo DryRun(
+      Scope* scope,
      const std::vector<std::string>& feed_names,
      const std::vector<framework::LoDTensor>& feed_tensors);

 private:
  std::shared_ptr<InterpreterCore> GetInterpreterCore(
+      Scope* scope,
      const ProgramDesc& prog,
      const std::vector<std::string>& feed_names,
      const std::vector<std::string>& fetch_names,

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -3063,6 +3063,7 @@ All parameter, weight, gradient are variables in Paddle.
                    Scope *>())
      .def("run",
           [](StandaloneExecutor &self,
+              Scope *scope,
              const std::unordered_map<std::string, py::array> &input_dict,
              std::vector<std::string> fetch_names) {
             std::vector<framework::LoDTensor> feed_tensors;
@@ -3079,12 +3080,13 @@ All parameter, weight, gradient are variables in Paddle.
             paddle::framework::FetchList ret;
             {
               pybind11::gil_scoped_release release;
-               ret = self.Run(feed_names, feed_tensors, fetch_names);
+               ret = self.Run(scope, feed_names, feed_tensors, fetch_names);
             }
             return py::cast(std::move(ret));
           })
      .def("run",
           [](StandaloneExecutor &self,
+              Scope *scope,
              const std::unordered_map<std::string, framework::LoDTensor>
                  &input_dict,
              std::vector<std::string> fetch_names) {
@@ -3099,23 +3101,25 @@ All parameter, weight, gradient are variables in Paddle.
             paddle::framework::FetchList ret;
             {
               pybind11::gil_scoped_release release;
-               ret = self.Run(feed_names, feed_tensors, fetch_names);
+               ret = self.Run(scope, feed_names, feed_tensors, fetch_names);
             }
             return py::cast(std::move(ret));
           })
      .def("run",
           [](StandaloneExecutor &self,
+              Scope *scope,
              std::vector<std::string> feed_names,
              std::vector<std::string> fetch_names) {
             paddle::framework::FetchList ret;
             {
               pybind11::gil_scoped_release release;
-               ret = self.Run(feed_names, fetch_names);
+               ret = self.Run(scope, feed_names, fetch_names);
             }
             return py::cast(std::move(ret));
           })
      .def("dry_run",
           [](StandaloneExecutor &self,
+              Scope *scope,
              const std::unordered_map<std::string, py::array> &input_dict) {
             std::vector<framework::LoDTensor> feed_tensors;
             std::vector<std::string> feed_names;
@@ -3131,7 +3135,7 @@ All parameter, weight, gradient are variables in Paddle.
             framework::interpreter::CostInfo cost_info;
             {
               pybind11::gil_scoped_release release;
-               cost_info = self.DryRun(feed_names, feed_tensors);
+               cost_info = self.DryRun(scope, feed_names, feed_tensors);
             }
             return cost_info;
           });

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -537,7 +537,7 @@ class _StandaloneExecutor(object):
        self._scope = scope
        self._new_exe = self._create_new_executor()

-    def run(self, feed_names, fetch_list, return_numpy=True):
+    def run(self, scope, feed_names, fetch_list, return_numpy=True):
        """
        Args:
            feed_names(list): This parameter represents the input names of the model.
@@ -549,7 +549,8 @@ class _StandaloneExecutor(object):
        """
        fetch_list = self._check_fetch(fetch_list)

-        tensors = self._new_exe.run(feed_names, fetch_list)._move_to_list()
+        tensors = self._new_exe.run(scope, feed_names,
+                                    fetch_list)._move_to_list()
        if return_numpy:
            return as_numpy(tensors, copy=True)
        else:
@@ -1470,7 +1471,8 @@ class Executor(object):
                    cpu_tensor = _as_lodtensor(data, core.CPUPlace())
                    tensor._copy_from(cpu_tensor, self.place)

-                return new_exe.run(list(feed.keys()), fetch_list, return_numpy)
+                return new_exe.run(scope, list(feed.keys()), fetch_list,
+                                   return_numpy)

        compiled = isinstance(program, compiler.CompiledProgram)


--- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
+++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
@@ -50,27 +50,30 @@ class LinearTestCase(unittest.TestCase):

    def test_interp_base(self):
        startup_program, main_program, c = self.build_program()
+        scope = core.Scope()
        standaloneexecutor = StandaloneExecutor(self.place,
                                                startup_program.desc,
-                                                main_program.desc, core.Scope())
+                                                main_program.desc, scope)
        out = standaloneexecutor.run(
-            {"a": np.ones([2, 2], dtype="float32") * 2}, [c.name])
+            scope, {"a": np.ones([2, 2], dtype="float32") * 2}, [c.name])
        for i in range(10):
            out = standaloneexecutor.run(
-                {"a": np.ones([2, 2], dtype="float32") * i}, [c.name])
+                scope, {"a": np.ones([2, 2], dtype="float32") * i}, [c.name])

        for i in range(10):
            out = standaloneexecutor.run(
-                {"a": np.ones([2, 2], dtype="float32") * i}, ['a', c.name])
+                scope, {"a": np.ones([2, 2], dtype="float32") * i},
+                ['a', c.name])

    def test_dry_run(self):
+        scope = core.Scope()
        startup_program, main_program, c = self.build_program()
        standaloneexecutor = StandaloneExecutor(self.place,
                                                startup_program.desc,
-                                                main_program.desc, core.Scope())
+                                                main_program.desc, scope)
        # test for cost_info
        cost_info = standaloneexecutor.dry_run(
-            {"a": np.ones([2, 2], dtype="float32")})
+            scope, {"a": np.ones([2, 2], dtype="float32")})
        self.check_cost_info(cost_info)

    def check_cost_info(self, cost_info):
@@ -132,14 +135,15 @@ class ExecutorStatisticsTestCase(unittest.TestCase):

        p = core.Place()
        p.set_place(self.place)
+        scope = core.Scope()
        executor = StandaloneExecutor(p, startup_program.desc,
-                                      main_program.desc, core.Scope())
+                                      main_program.desc, scope)

        helper_profiler = profiler.Profiler(
            targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2))
        helper_profiler.start()
        for i in range(self.iter_n):
-            executor.run({}, fetch_list)
+            executor.run(scope, {}, fetch_list)
            helper_profiler.step()
        helper_profiler.stop()

@@ -251,13 +255,15 @@ class MultiStreamModelTestCase(unittest.TestCase):

        p = core.Place()
        p.set_place(self.place)
+        scope = core.Scope()
        inter_core = StandaloneExecutor(p, startup_program.desc,
-                                        main_program.desc, core.Scope())
+                                        main_program.desc, scope)

        outs = []
        for i in range(self.iter_n):
            outs.append(
-                np.array(inter_core.run({}, fetch_list)._move_to_list()[0]))
+                np.array(
+                    inter_core.run(scope, {}, fetch_list)._move_to_list()[0]))
        return outs