fix gpu error test=develop

f4f4816b · Qiao Longfei · 12f6b8c3 · f4f4816b · f4f4816b
Showing with 16 addition and 4 deletion

paddle/fluid/framework/details/async_ssa_graph_executor.cc paddle/fluid/framework/details/async_ssa_graph_executor.cc +1 -0

paddle/fluid/framework/parallel_executor.cc paddle/fluid/framework/parallel_executor.cc +15 -4

未找到文件。
--- a/paddle/fluid/framework/details/async_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/async_ssa_graph_executor.cc
@@ -29,6 +29,7 @@ AsyncSSAGraphExecutor::AsyncSSAGraphExecutor(
      graphs_(std::move(graphs)) {
  VLOG(3) << "build AsyncSSAGraphExecutor";
  PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size());
+  PADDLE_ENFORCE_EQ(graphs_.size, local_scopes_.size());
  // set the correct size of thread pool to each device.
  strategy_.num_threads_ = strategy_.num_threads_ < places_.size()

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -261,10 +261,21 @@ ParallelExecutor::ParallelExecutor(
  // ncclOp
  std::vector<std::unique_ptr<ir::Graph>> graphs;
 #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
+  if (build_strategy.async_mode_ && !build_strategy.is_distribution_) {
+    VLOG(3) << "use local async mode";
+    for (size_t i = 0; i < member_->places_.size(); ++i) {
+      std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
+          main_program, {member_->places_[i]}, loss_var_name,
+          {member_->local_scopes_[i]}, member_->nranks_, member_->use_cuda_,
+          member_->nccl_ctxs_.get());
+      graphs.push_back(std::move(graph));
+    }
+  } else {
    std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
        main_program, member_->places_, loss_var_name, member_->local_scopes_,
        member_->nranks_, member_->use_cuda_, member_->nccl_ctxs_.get());
    graphs.push_back(std::move(graph));
+  }
 #else
  if (build_strategy.async_mode_ && !build_strategy.is_distribution_) {
    VLOG(3) << "use local async mode";