code refactoring for new executor (#34970)

* code refactoring, test=develop * refine, test=develop * refine, test=develop * refine, test=develop

code refactoring for new executor (#34970)
* code refactoring, test=develop * refine, test=develop * refine, test=develop * refine, test=develop
40d4d834 · wanghuancoder · GitHub · 1b747de7 · 40d4d834 · 40d4d834
12 changed file
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -25,6 +25,7 @@ add_subdirectory(ir)
 add_subdirectory(details)
 add_subdirectory(fleet)
 add_subdirectory(io)
+add_subdirectory(new_executor)
 #ddim lib
 proto_library(framework_proto SRCS framework.proto)


--- a/paddle/fluid/framework/new_executor/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/CMakeLists.txt
+cc_library(interpretercore SRCS interpretercore.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
+cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
+
+# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
--- a/paddle/fluid/framework/new_exec.h
+++ b/paddle/fluid/framework/new_exec.h
--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <map>
+#include <queue>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/new_executor/interpretercore_util.h"
+#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/framework/variable.h"
+
+namespace paddle {
+namespace framework {
+
+class InterpreterCore {
+ public:
+  InterpreterCore(const platform::Place& place, const ProgramDesc& main_prog,
+                  VariableScope* global_scope,
+                  const std::vector<std::string>& feed_names,
+                  const std::vector<std::string>& fetch_names);
+
+  void Run(const std::vector<framework::Tensor>& feed_tensors,
+           std::vector<framework::Tensor>* fetch_tensors);
+
+  static void BuildOpFuncList(const platform::Place& place,
+                              const framework::ProgramDesc& pdesc,
+                              std::vector<OperatorBase*>* op_list,
+                              std::vector<OpFuncNode>* vec_func_list,
+                              VariableScope* var_scope);
+
+ private:
+  void Convert();
+
+  void RunInstruction(const Instruction& instr_node,
+                      const VariableScope& var_scope,
+                      const platform::Place& place);
+
+  void ExecuteInstructionList(const std::vector<Instruction>& vec_instr,
+                              const VariableScope& var_scope,
+                              const platform::Place& place);
+
+  std::vector<size_t> MergeVector(const std::vector<size_t>& first,
+                                  const std::vector<size_t>& second);
+
+  void BuildVariableScope(const framework::ProgramDesc& pdesc,
+                          VariableScope* var_scope);
+
+  const platform::Place& place_;
+  const ProgramDesc& main_program_;
+  VariableScope* global_scope_;
+  std::vector<VariableMetaInfo> vec_meta_info_;
+
+  std::vector<paddle::framework::OpFuncNode> vec_func_list_;
+  std::vector<paddle::framework::OperatorBase*> op_list_;
+
+  std::vector<Instruction> vec_instruction_;
+  InstructionInfo instruction_info_;
+  std::vector<size_t> dependecy_count_;
+  std::vector<VariableMetaInfo> ref_coun_info_;
+  std::vector<std::vector<size_t>> input_var2op_info_;
+
+  bool is_build_;
+
+  std::vector<std::string> feed_names_;
+  std::vector<std::string> fetch_names_;
+};
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/new_exec_util.h
+++ b/paddle/fluid/framework/new_exec_util.h
@@ -13,7 +13,7 @@
 // limitations under the License.

 /*************************************************************************
-  > File Name: new_exec_util.h
+  > File Name: interpretercore_util.h
  > Author: guanshanshan@baidu.com
  > Created Time: Fri 23 Jul 2021 06:19:19 AM UTC
 ************************************************************************/

--- a/paddle/fluid/framework/new_executor/new_executor_defs.h
+++ b/paddle/fluid/framework/new_executor/new_executor_defs.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/operator.h"
+
+namespace paddle {
+namespace framework {
+
+using OpKernelComputeFunc = std::function<void(const ExecutionContext&)>;
+using OpKernelMap =
+    std::unordered_map<OpKernelType, OpKernelComputeFunc, OpKernelType::Hash>;
+
+struct OpKernelFunc {
+  OpKernelComputeFunc compute_func_;
+  OperatorBase* operator_base_;
+};
+
+struct VariableMetaInfo {
+  int var_ref_count_;
+};
+
+struct VariableScope {
+  std::vector<Variable*> var_list;
+  std::map<std::string, int> name2id;
+};
+
+struct NextInstruction {
+  std::vector<size_t> direct_run_;
+};
+
+struct EventInter {};
+
+struct InstructionInfo {
+  std::vector<size_t> dependecy_count_;
+};
+
+struct EventRun {
+  EventInter event_inter;
+  std::vector<size_t> same_device_run_;
+  std::vector<size_t> synchronized_run;
+};
+
+struct Instruction {
+  OpKernelFunc kernel_func_;
+  std::map<std::string, std::vector<int>> input_index_;
+  std::map<std::string, std::vector<int>> output_index_;
+
+  std::vector<size_t> gc_check_var_list;
+  NextInstruction next_instruction_;
+  std::vector<EventInter> vec_event_list_;
+};
+
+struct OpFuncNode {
+  // int unsed;
+  std::map<std::string, std::vector<int>> input_index;
+  std::map<std::string, std::vector<int>> output_index;
+
+  OpKernelComputeFunc kernel_func_;
+};
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/new_executor/standalone_executor.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/framework/new_executor/standalone_executor.h"
+
+namespace paddle {
+namespace framework {
+StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
+                                       const ProgramDesc& startup_prog,
+                                       const ProgramDesc& main_prog,
+                                       Scope* scope)
+    : place_(place),
+      startup_prog_(startup_prog),
+      main_prog_(main_prog),
+      outer_scope_(scope) {
+  paddle::framework::InitDevices();
+
+  // init scope
+  BuildVariableOuterScope(startup_prog, &global_scope_, scope);
+
+  if (outer_scope_ != nullptr) {
+    auto name_list = outer_scope_->LocalVarNames();
+    for (auto name : name_list) {
+      auto v = outer_scope_->Var(name);
+      if (global_scope_.name2id.find(name) == global_scope_.name2id.end()) {
+        global_scope_.name2id[name] = global_scope_.var_list.size();
+      }
+
+      global_scope_.var_list.push_back(v);
+    }
+  }
+
+  // run startup program
+  std::vector<paddle::framework::OpFuncNode> vec_func_list;
+  std::vector<paddle::framework::OperatorBase*> op_list;
+  InterpreterCore::BuildOpFuncList(place_, startup_prog, &op_list,
+                                   &vec_func_list, &global_scope_);
+}
+
+int StandaloneExecutor::Run(const std::vector<std::string>& feed_names,
+                            const std::vector<framework::Tensor>& feed_tensors,
+                            const std::vector<std::string>& fetch_names,
+                            std::vector<framework::Tensor>* fetch_tensors) {
+  auto core = GetInterpreterCore(feed_names, fetch_names);
+
+  core->Run(feed_tensors, fetch_tensors);
+
+  return 0;
+}
+
+void StandaloneExecutor::BuildVariableOuterScope(
+    const framework::ProgramDesc& pdesc, VariableScope* var_scope,
+    Scope* outer_scope) {
+  auto& global_block = pdesc.Block(0);
+
+  for (auto& var : global_block.AllVars()) {
+    if (var->Name() == framework::kEmptyVarName) {
+      continue;
+    }
+
+    if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
+      var_scope->name2id[var->Name()] = var_scope->var_list.size();
+      auto v = outer_scope->Var(var->Name());
+      InitializeVariable(v, var->GetType());
+      var_scope->var_list.push_back(v);
+    }
+  }
+}
+
+std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
+    const std::vector<std::string>& feed_names,
+    const std::vector<std::string>& fetch_names) {
+  std::ostringstream oss;
+  oss << "feed:";
+  for (auto& feedname : feed_names) {
+    oss << feedname << ",";
+  }
+  oss << "fetch:";
+  for (auto& fetchname : fetch_names) {
+    oss << fetchname << ",";
+  }
+
+  auto iter = interpretercores_.find(oss.str());
+
+  if (iter == interpretercores_.end()) {
+    auto core = std::make_shared<InterpreterCore>(
+        place_, main_prog_, &global_scope_, feed_names, fetch_names);
+    interpretercores_.emplace(oss.str(), core);
+    return core;
+  } else {
+    return iter->second;
+  }
+}
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/new_executor/standalone_executor.h
+++ b/paddle/fluid/framework/new_executor/standalone_executor.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/new_executor/interpretercore.h"
+
+namespace paddle {
+namespace framework {
+
+class ExecutorBase {
+ public:
+  virtual ~ExecutorBase() {}
+  virtual int Run(const std::vector<std::string>& feed_names,
+                  const std::vector<framework::Tensor>& feed_tensors,
+                  const std::vector<std::string>& fetch_names,
+                  std::vector<framework::Tensor>* fetch_tensors) = 0;
+};
+
+class StandaloneExecutor : public ExecutorBase {
+ public:
+  StandaloneExecutor(const platform::Place& place,
+                     const ProgramDesc& startup_prog,
+                     const ProgramDesc& main_prog, Scope* scope);
+
+  ~StandaloneExecutor() {}
+
+  virtual int Run(const std::vector<std::string>& feed_names,
+                  const std::vector<framework::Tensor>& feed_tensors,
+                  const std::vector<std::string>& fetch_names,
+                  std::vector<framework::Tensor>* fetch_tensors);
+
+ private:
+  void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
+                               VariableScope* var_scope, Scope* outer_scope);
+
+  std::shared_ptr<InterpreterCore> GetInterpreterCore(
+      const std::vector<std::string>& feed_names,
+      const std::vector<std::string>& fetch_names);
+
+  const platform::Place& place_;
+  const ProgramDesc& startup_prog_;
+  const ProgramDesc& main_prog_;
+  Scope* outer_scope_;
+  VariableScope global_scope_;
+
+  std::unordered_map<std::string, std::shared_ptr<InterpreterCore>>
+      interpretercores_;
+};
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/new_exec_test.cc
+++ b/paddle/fluid/framework/new_exec_test.cc
@@ -21,68 +21,44 @@
 #include <unordered_map>
 #include <vector>

-#include "paddle/fluid/framework/executor_gc_helper.h"
-#include "paddle/fluid/framework/garbage_collector.h"
-#include "paddle/fluid/framework/op_info.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/variable.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/framework/new_executor/standalone_executor.h"

-#include "paddle/fluid/pybind/pybind.h"
+paddle::framework::ProgramDesc load_from_file(const std::string& file_name) {
+  std::ifstream fin(file_name, std::ios::in | std::ios::binary);
+  fin.seekg(0, std::ios::end);
+  std::string buffer(fin.tellg(), ' ');
+  fin.seekg(0, std::ios::beg);
+  fin.read(&buffer[0], buffer.size());
+  fin.close();

-#include "gperftools/profiler.h"
-#include "paddle/fluid/framework/new_exec.h"
-#include "paddle/fluid/platform/init.h"
+  paddle::framework::ProgramDesc program_desc(buffer);
+  return program_desc;
+}

 int main() {
  paddle::framework::InitDevices();
-  paddle::framework::VariableScope global_scope;
  auto place = paddle::platform::CUDAPlace(0);
-  auto test_prog = paddle::framework::load_from_file("lm_startup_program");
-  {
-    paddle::framework::build_variable_scope(test_prog, &global_scope);
-
-    std::vector<paddle::framework::OpFuncNode> vec_func_list;
-    std::vector<paddle::framework::OperatorBase*> op_list;
-    paddle::framework::build_op_func_list(test_prog, op_list, vec_func_list,
-                                          &global_scope, place);
-
-    // paddle::framework::exec_op_func_list( vec_func_list, op_list,
-    // global_scope, place );
-  }
+  auto test_prog = load_from_file("lm_startup_program");

-  cerr << "run main" << endl;
-  auto main_prog = paddle::framework::load_from_file("lm_main_program");
+  auto main_prog = load_from_file("lm_main_program");

-  paddle::framework::build_variable_scope(main_prog, &global_scope);
-
-  std::vector<paddle::framework::OpFuncNode> vec_main_func_list;
-  std::vector<paddle::framework::OperatorBase*> op_main_list;
-  paddle::framework::build_op_func_list(
-      main_prog, op_main_list, vec_main_func_list, &global_scope, place);
  paddle::framework::Scope scope;
-  paddle::framework::InterpreterCore interp_core(place, main_prog, test_prog,
-                                                 &scope);
+  paddle::framework::StandaloneExecutor exec(place, test_prog, main_prog,
+                                             &scope);
+
  auto start = std::chrono::steady_clock::now();
-  ProfilerStart("new_executor.prof");
  for (size_t i = 0; i < 2320; ++i) {
    if (i % 200 == 0) {
-      cerr << i << endl;
+      std::cout << i << std::endl;
    }
-    // paddle::framework::exec_op_func_list( vec_main_func_list, op_main_list,
-    // global_scope, place );
+
    std::vector<paddle::framework::Tensor> vec_out;
-    interp_core.run({}, {}, {}, vec_out);
+    exec.Run({}, {}, {}, &vec_out);
  }
-  ProfilerStop();
  auto end = std::chrono::steady_clock::now();
  std::chrono::duration<double> diff = end - start;

-  cerr << "time cost " << diff.count() << endl;
+  std::cout << "time cost " << diff.count() << std::endl;

  return 1;
 }
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -200,6 +200,7 @@ if(WITH_PYTHON)
  endif(WIN32)
  add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})

+  list(APPEND PYBIND_DEPS interpretercore standalone_executor)
  cc_library(paddle_pybind SHARED
    SRCS ${PYBIND_SRCS}
    DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -42,7 +42,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
-#include "paddle/fluid/framework/new_exec.h"
+#include "paddle/fluid/framework/new_executor/standalone_executor.h"
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
@@ -1945,30 +1945,30 @@ All parameter, weight, gradient are variables in Paddle.
                 fetch_vars);
      });

-  py::class_<framework::InterpreterCore>(m, "InterpreterCore")
+  py::class_<framework::StandaloneExecutor>(m, "StandaloneExecutor")
      .def(py::init<const platform::Place &, const ProgramDesc &,
                    const ProgramDesc &, Scope *>())
      .def("run",
-           [](InterpreterCore &self,
+           [](StandaloneExecutor &self,
              const std::unordered_map<std::string, py::array> &input_dict,
-              std::vector<std::string> vec_fetch_name) {
+              std::vector<std::string> fetch_names) {
             pybind11::gil_scoped_release release;
-             std::vector<framework::Tensor> vec_tensor;
-             std::vector<std::string> vec_name;
+             std::vector<framework::Tensor> feed_tensors;
+             std::vector<std::string> feed_names;

             for (auto &item : input_dict) {
               framework::LoDTensor t;
               SetTensorFromPyArray<platform::CPUPlace>(
                   &t, item.second, platform::CPUPlace(), false);
-               vec_name.push_back(item.first);
-               vec_tensor.push_back(t);
+               feed_names.push_back(item.first);
+               feed_tensors.push_back(t);
             }

-             std::vector<framework::Tensor> vec_out;
-             self.run(vec_name, vec_tensor, vec_fetch_name, &vec_out);
+             std::vector<framework::Tensor> fetch_tensors;
+             self.Run(feed_names, feed_tensors, fetch_names, &fetch_tensors);
             std::vector<py::array> vec_ret;
-             for (size_t i = 0; i < vec_out.size(); ++i) {
-               vec_ret.push_back(TensorToPyArray(vec_out[i], true));
+             for (size_t i = 0; i < fetch_tensors.size(); ++i) {
+               vec_ret.push_back(TensorToPyArray(fetch_tensors[i], true));
             }
             return vec_ret;
           });

--- a/python/paddle/fluid/tests/unittests/interpreter/test_interpreter.py
+++ b/python/paddle/fluid/tests/unittests/interpreter/test_interpreter.py
@@ -15,7 +15,7 @@
 import unittest
 import paddle
 from paddle.fluid import core
-from paddle.fluid.core import InterpreterCore
+from paddle.fluid.core import StandaloneExecutor

 import numpy as np

@@ -37,19 +37,25 @@ class LinearTestCase(unittest.TestCase):
        startup_program = paddle.fluid.default_startup_program()
        p = core.Place()
        p.set_place(self.place)
-        inter_core = InterpreterCore(p, main_program.desc, startup_program.desc,
-                                     core.Scope())
+        standaloneexecutor = StandaloneExecutor(p, startup_program.desc,
+                                                main_program.desc, core.Scope())

-        out = inter_core.run({
+        out = standaloneexecutor.run({
            "a": np.ones(
                [2, 2], dtype="float32") * 2
        }, [c.name])
        for i in range(10):
-            out = inter_core.run({
+            out = standaloneexecutor.run({
                "a": np.ones(
                    [2, 2], dtype="float32") * i
            }, [c.name])

+        for i in range(10):
+            out = standaloneexecutor.run({
+                "a": np.ones(
+                    [2, 2], dtype="float32") * i
+            }, [a.name, c.name])
+

 if __name__ == "__main__":
    unittest.main()