diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index eeb5e3b397c10e268607113e10ad857b6de9e444..b1d892e2521a39dbf37db171831e41454f5f4c52 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -110,7 +110,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( std::vector& nccl_ids) { // NOLINT if (rank_ == 0) { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto nccl_id = std::vector( reinterpret_cast(&nccl_ids[i]), reinterpret_cast(&nccl_ids[i]) + NCCL_UNIQUE_ID_BYTES); @@ -118,7 +119,8 @@ void ProcessGroupNCCL::BroadcastUniqueNCCLID( } } else { for (size_t i = 0; i < nccl_ids.size(); i++) { - auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i); + auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(gid_) + "/" + + std::to_string(i); auto ret = store_->get(key); std::memcpy(&nccl_ids[i], ret.data(), ret.size()); }