From 031debb75bec5b8ddf1faa878985c6c22578cfcd Mon Sep 17 00:00:00 2001 From: ShenLiang <1422485404@qq.com> Date: Mon, 8 Aug 2022 20:02:02 +0800 Subject: [PATCH] fix memory leak (#44971) --- .../distributed/collective/ProcessGroupHCCL.cc | 11 ----------- .../distributed/collective/ProcessGroupNCCL.cc | 15 +++++++-------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc index 3b3b505ffb..718b33903a 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc @@ -197,17 +197,6 @@ std::shared_ptr ProcessGroupHCCL::Collective( SyncDefaultStream(places, places_to_events_[key], places_to_ctx_[key]); auto task = CreateTask(places, rank_, op_type, inputs); - task->SetOutputs(outputs); - - // if (FLAGS_use_stream_safe_npu_allocator) { - // for (size_t i = 0; i < inputs.size(); ++i) { - // platform::NPUDeviceGuard guard(places[i].GetDeviceId()); - // auto dense_tensor = - // std::dynamic_pointer_cast(inputs[i].impl()); - // memory::RecordStream(dense_tensor->Holder(), - // places_to_ctx_[key][i]->stream()); - // } - // } for (size_t i = 0; i < inputs.size(); ++i) { platform::NPUDeviceGuard guard(places[i].GetDeviceId()); diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index d776f62373..168548cf9b 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -244,25 +244,24 @@ std::shared_ptr ProcessGroupNCCL::Collective( SyncDefaultStream(places, places_to_events_[key], places_to_ctx_[key]); auto task = CreateTask(places, rank_, op_type, inputs); - task->SetOutputs(outputs); // construct uninitialize guard for device platform::CUDADeviceGuard cuda_guard; - if (FLAGS_use_stream_safe_cuda_allocator) { + { + platform::NCCLGroupGuard nccl_guard; for (size_t i = 0; i < inputs.size(); ++i) { cuda_guard.SetDevice(places[i]); - memory::RecordStream(inputs[i].Holder(), - places_to_ctx_[key][i]->stream()); + const auto& nccl_stream = places_to_ctx_[key][i]->stream(); + fn(inputs[i], outputs[i], nccl_comms[i]->GetNcclComm(), nccl_stream); } } - { - platform::NCCLGroupGuard nccl_guard; + if (FLAGS_use_stream_safe_cuda_allocator) { for (size_t i = 0; i < inputs.size(); ++i) { cuda_guard.SetDevice(places[i]); - const auto& nccl_stream = places_to_ctx_[key][i]->stream(); - fn(inputs[i], outputs[i], nccl_comms[i]->GetNcclComm(), nccl_stream); + memory::RecordStream(inputs[i].Holder(), + places_to_ctx_[key][i]->stream()); } } -- GitLab