Make ParallelExecutor support Windows GPU (#17787)

* fix the ParallelExecutor on Windows test=develop * restrict to use one GPU only under windows

Make ParallelExecutor support Windows GPU (#17787)
* fix the ParallelExecutor on Windows test=develop * restrict to use one GPU only under windows
453a49b1 · wopeizl · GitHub · 39bc8a55 · 453a49b1 · 453a49b1
隐藏空白更改
内联并排

Showing with 31 addition and 9 deletion

paddle/fluid/framework/parallel_executor.cc paddle/fluid/framework/parallel_executor.cc +19 -8

paddle/fluid/pybind/pybind.cc paddle/fluid/pybind/pybind.cc +12 -1

未找到文件。
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -296,6 +296,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
  member_->use_all_reduce_ =
      build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
  member_->nranks_ = build_strategy.num_trainers_ * places.size();
+#if defined(PADDLE_WITH_CUDA) && defined(_WIN32)
+  if (member_->use_cuda_) {
+    PADDLE_ENFORCE(places.size() == 1, "Windows can support Single GPU only.");
+  }
+#endif
  if (!member_->use_all_reduce_) {
    PADDLE_ENFORCE(places.size() > 1,
                   "If you set build_strategy.reduce with 'Reduce',"
@@ -361,8 +366,6 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
          member_->nccl_ctxs_.DefaultFlatCtx()->at(member_->places_[dev_id]);
      dev_ctx->set_nccl_comm(nccl_ctx.comm());
    }
-#else
-    PADDLE_THROW("Not compiled with CUDA");
 #endif
  }
  // broadcast parameters from the 0th device to others:
@@ -544,8 +547,6 @@ void ParallelExecutor::BCastParamsToDevices(
        }
        nccl_ctxs->WaitAll();
      }
-#else
-      PADDLE_THROW("Not compiled with CUDA");
 #endif
    } else {
      platform::CPUPlace cpu;
@@ -650,7 +651,9 @@ ParallelExecutor::~ParallelExecutor() {
 bool ParallelExecutor::EnableParallelGraphExecution(
    const ir::Graph &graph, const ExecutionStrategy &exec_strategy,
    const BuildStrategy &build_strategy) const {
-  if (!FLAGS_enable_parallel_graph) return false;
+  if (!FLAGS_enable_parallel_graph) {
+    return false;
+  }
  bool enable_parallel_graph = true;
@@ -670,11 +673,19 @@ bool ParallelExecutor::EnableParallelGraphExecution(
    }
  }
-  if (!member_->use_all_reduce_ || !member_->use_cuda_)
+  if (!member_->use_all_reduce_ || !member_->use_cuda_) {
    if (build_strategy.enable_sequential_execution_ ||
-        exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental)
+        exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) {
      enable_parallel_graph = false;
+    }
+  }
+#ifdef WIN32
+  VLOG(1) << "Windows has no support to parallel graph, enable_parallel_graph "
+             "would be forced to false.";
+  enable_parallel_graph = false;
+#endif
  return enable_parallel_graph;
 }

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1341,6 +1341,9 @@ All parameter, weight, gradient are variables in Paddle.
          "num_trainers",
          [](const BuildStrategy &self) { return self.num_trainers_; },
          [](BuildStrategy &self, int num_trainers) {
+#ifdef WIN32
+            PADDLE_THROW("Windows has NO support to distribute mode.");
+#endif
            self.num_trainers_ = num_trainers;
          })
      .def_property(
@@ -1486,7 +1489,15 @@ All parameter, weight, gradient are variables in Paddle.
      .def_property(
          "is_distribution",
          [](const BuildStrategy &self) { return self.is_distribution_; },
-          [](BuildStrategy &self, bool b) { self.is_distribution_ = b; })
+          [](BuildStrategy &self, bool b) {
+#ifdef WIN32
+            if (b) {
+              PADDLE_THROW("Windows has NO support to distribute mode.");
+            }
+#else
+            self.is_distribution_ = b;
+#endif
+          })
      .def_property("async_mode",
                    [](const BuildStrategy &self) { return self.async_mode_; },
                    [](BuildStrategy &self, bool b) { self.async_mode_ = b; })