From 6449a2320812e0c269a0258bb162ec862a73579e Mon Sep 17 00:00:00 2001 From: lilong12 Date: Mon, 18 Apr 2022 21:48:28 +0800 Subject: [PATCH] use group id to differentiate keys for tcp store (#41496) (#41510) --- paddle/fluid/distributed/collective/ProcessGroupNCCL.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index 3a2d3f82ee..30813b904d 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -110,7 +110,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( std::vector& nccl_ids) { // NOLINT if (rank_ == 0) { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto nccl_id = std::vector( reinterpret_cast(&nccl_ids[i]), reinterpret_cast(&nccl_ids[i]) + NCCL_UNIQUE_ID_BYTES); @@ -118,7 +119,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( } } else { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto ret = store_->get(key); std::memcpy(&nccl_ids[i], ret.data(), ret.size()); } -- GitLab