From 8744f9a083719626c56190672b66eb7ac24d32be Mon Sep 17 00:00:00 2001
From: Qiao Longfei <qiaolongfei@baidu.com>
Date: Mon, 4 Mar 2019 22:54:26 +0800
Subject: [PATCH] fix parallel executor async mode

---
 paddle/fluid/framework/parallel_executor.cc | 10 ++++++++--
 paddle/fluid/framework/parallel_executor.h  |  3 ++-
 paddle/fluid/pybind/pybind.cc               |  2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index c133772e6e8..ae7cd800adb 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -188,7 +188,7 @@ ParallelExecutor::ParallelExecutor(
     const std::string &loss_var_name, Scope *scope,
     const std::vector<Scope *> &local_scopes,
     const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy,
-    std::vector<ir::Graph *> graphs)
+    ir::Graph *graph)
     : member_(new ParallelExecutorPrivate(places)) {
   member_->global_scope_ = scope;
   member_->use_cuda_ = exec_strategy.use_cuda_;
@@ -218,12 +218,18 @@ ParallelExecutor::ParallelExecutor(
     }
   }
 
+  std::vector<ir::Graph *> graphs;
   if (build_strategy.async_mode_) {
     PADDLE_ENFORCE(!member_->use_cuda_,
                    "gpu mode does not support async_mode_ now!");
+    graphs.push_back(graph);
+    for (int i = 1; i < places.size(); ++i) {
+      auto *tmp_graph = new ir::Graph(graph->OriginProgram());
+      async_graphs_.emplace_back(tmp_graph);
+      graphs.push_back(tmp_graph);
+    }
   }
 
-  ir::Graph *graph = graphs[0];
   std::unique_ptr<ir::Graph> temp_owned_graph(graph);
 
   // FIXME(Yancey1989): parallel graph mode get better performance
diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h
index 0e05b2a460a..987f7150663 100644
--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@@ -50,7 +50,7 @@ class ParallelExecutor {
                             const std::vector<Scope *> &local_scopes,
                             const ExecutionStrategy &exec_strategy,
                             const BuildStrategy &build_strategy,
-                            std::vector<ir::Graph *> graphs);
+                            ir::Graph *graph);
 
   ~ParallelExecutor();
 
@@ -76,6 +76,7 @@ class ParallelExecutor {
                                     const BuildStrategy &build_strategy) const;
 
   ParallelExecutorPrivate *member_;
+  std::vector<std::unique_ptr<ir::Graph>> async_graphs_;
 #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
   std::unique_ptr<ncclUniqueId> local_nccl_id_;
 #endif
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 6d1fc0be232..69cfe280c6b 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1271,7 +1271,7 @@ All parameter, weight, gradient are variables in Paddle.
   pe.def(py::init<const std::vector<platform::Place> &,
                   const std::unordered_set<std::string> &, const std::string &,
                   Scope *, std::vector<Scope *> &, const ExecutionStrategy &,
-                  const BuildStrategy &, std::vector<ir::Graph *>>())
+                  const BuildStrategy &, ir::Graph *>())
       // NOTE: even we return a vec<Scope*>* to Python use reference policy.
       // We still cannot get local_scope from this vector, since the element
       // of vec<Scope*> will be freed by Python GC. We can only return Scope*
-- 
GitLab