fix pe with cpu place

1effba33 · Yancey1989 · e568acbe · 1effba33 · 1effba33 · 1effba33
3 changed file
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -95,7 +95,7 @@ ParallelExecutor::ParallelExecutor(
  }
  if (member_->local_scopes_.size() != 1 && local_scopes.empty()) {
-    BCastParamsToGPUs(bcast_vars);
+    BCastParamsToDevs(bcast_vars);
  }
  // Startup Program has been run. All local scopes has correct parameters.
@@ -131,7 +131,7 @@ ParallelExecutor::ParallelExecutor(
      member_->places_, std::move(member_->executor_)));
 }
-void ParallelExecutor::BCastParamsToGPUs(
+void ParallelExecutor::BCastParamsToDevs(
    const std::unordered_set<std::string> &vars) const {
  // the the initializing bcast, all vars would be bcast from device(0),
  // otherwise
@@ -202,7 +202,11 @@ void ParallelExecutor::BCastParamsToGPUs(
 #endif
    } else {
      platform::CPUPlace cpu;
-      for (size_t i = 1; i < member_->places_.size(); ++i) {
+      for (size_t i = 0; i < member_->places_.size(); ++i) {
+        if ((initializing && i == 0) ||
+            (!initializing && static_cast<int>(i) == var_dev_id))
+          continue;
        auto local_scope = member_->local_scopes_[i];
        auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
        t->Resize(dims);

--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@@ -66,7 +66,7 @@ class ParallelExecutor {
  void Run(const std::vector<std::string> &fetch_tensors,
           const std::string &fetched_var_name);
-  void BCastParamsToGPUs(const std::unordered_set<std::string> &vars) const;
+  void BCastParamsToDevs(const std::unordered_set<std::string> &vars) const;
 private:
  ParallelExecutorPrivate *member_;

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -656,7 +656,7 @@ All parameter, weight, gradient are variables in Paddle.
                  const std::string &, Scope *, std::vector<Scope *> &,
                  const ExecutionStrategy &, const BuildStrategy &, size_t,
                  size_t>())
-      .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs)
+      .def("bcast_params", &ParallelExecutor::BCastParamsToDevs)
      // NOTE: even we return a vec<Scope*>* to Python use reference policy.
      // We still cannot get local_scope from this vector, since the element
      // of vec<Scope*> will be freed by Python GC. We can only return Scope*