From a76fa414983a0eda7b2b12ddcdd0d89fd77e492c Mon Sep 17 00:00:00 2001 From: ziyoujiyi <73728031+ziyoujiyi@users.noreply.github.com> Date: Sat, 17 Sep 2022 10:39:29 +0800 Subject: [PATCH] V2.4 - cherry-pick (#46126) * back fl * delete ssl cert * . * make warning * . * unittest paral degree * solve unittest * heter & multi cloud commm ready * . * . * fix gloo compile warning * adapt for nn fl-ps --- cmake/external/gloo.cmake | 4 ++-- python/paddle/distributed/ps/the_one_ps.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) mode change 100644 => 100755 cmake/external/gloo.cmake diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake old mode 100644 new mode 100755 index cd7b254892e..a0fc013a130 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -25,8 +25,8 @@ set(GLOO_LIBRARY_DIR "${GLOO_INSTALL_DIR}/lib" CACHE PATH "gloo library directory." FORCE) # As we add extra features for gloo, we use the non-official repo -set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git) -set(GLOO_TAG v0.0.2) +set(GLOO_REPOSITORY ${GIT_URL}/ziyoujiyi/gloo.git) +set(GLOO_TAG v0.0.3) set(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE) diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py index 77a0ab0a659..5765a5e24b2 100755 --- a/python/paddle/distributed/ps/the_one_ps.py +++ b/python/paddle/distributed/ps/the_one_ps.py @@ -1090,8 +1090,9 @@ class TheOnePSRuntime(RuntimeBase): print("communicator config:", trainer_config.get_communicator_flags()) self._worker.init_worker(worker_desc, self.string_hosts, self.role_id) - self.trainer_endpoint = get_trainer_endpoint(self.role_maker) - print("fl-ps > trainer_endpoint: {}".format(self.trainer_endpoint)) + if not self.is_heter_ps_mode: + self.trainer_endpoint = get_trainer_endpoint(self.role_maker) + print("fl-ps > trainer_endpoint: {}".format(self.trainer_endpoint)) print("fl-ps > with_coordinator? {}".format(self.with_coordinator)) print("fl-ps > coordinator addr: {}".format(self.coordinator_hosts)) if self.with_coordinator: -- GitLab