From 75227c9e35308dac71d710e8360eaa9854f97915 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Thu, 7 Apr 2022 16:38:16 +0800 Subject: [PATCH] use group id to differentiate keys for tcp store (#41496) --- paddle/fluid/distributed/collective/ProcessGroupNCCL.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index eeb5e3b397c..b1d892e2521 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -110,7 +110,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( std::vector& nccl_ids) { // NOLINT if (rank_ == 0) { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto nccl_id = std::vector( reinterpret_cast(&nccl_ids[i]), reinterpret_cast(&nccl_ids[i]) + NCCL_UNIQUE_ID_BYTES); @@ -118,7 +119,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( } } else { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto ret = store_->get(key); std::memcpy(&nccl_ids[i], ret.data(), ret.size()); } -- GitLab