From f70f5e4fdafdf31276d9adee02a3d41e0600b778 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Fri, 11 Mar 2022 18:58:28 +0800 Subject: [PATCH] fix the bug for processgroup_hccl compiling (#40437) --- .../collective/ProcessGroupHCCL.cc | 8 +++---- .../distributed/collective/ProcessGroupHCCL.h | 23 ------------------- paddle/fluid/pybind/CMakeLists.txt | 2 +- 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc index 84f5ca48d2..2deeb7ca03 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc @@ -139,11 +139,9 @@ bool ProcessGroupHCCL::HCCLTask::IsCompleted() { // TODO(sandyhouse): Add timeout for wait, now timeout unused bool ProcessGroupHCCL::HCCLTask::Wait(std::chrono::milliseconds timeout) { SynchronizeStreams(); - if (FLAGS_hccl_blocking_wait) { - // NOTE(sandyhouse): It will block host for sync - while (!IsCompleted()) { - std::this_thread::sleep_for(std::chrono::milliseconds(kWaitBlockTImeout)); - } + // NOTE(sandyhouse): It will block host for sync + while (!IsCompleted()) { + std::this_thread::sleep_for(std::chrono::milliseconds(kWaitBlockTImeout)); } return true; } diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h index f2376b4eed..83d509be2c 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h @@ -84,29 +84,6 @@ class ProcessGroupHCCL : public ProcessGroup { std::vector& tensors, const BroadcastOptions& = BroadcastOptions()) override; - std::shared_ptr Barrier( - const BarrierOptions& = BarrierOptions()) override; - - std::shared_ptr Send(std::vector& tensors, - int dst_rank) override; - - std::shared_ptr Recv(std::vector& tensors, - int src_rank) override; - - std::shared_ptr AllGather( - std::vector& in_tensors, - std::vector& out_tensors) override; - - std::shared_ptr AllToAll( - std::vector& in, std::vector& out) override; - - std::shared_ptr Reduce( - std::vector& tensors, const ReduceOptions& opts) override; - - std::shared_ptr Scatter(std::vector& in_tensors, - std::vector& out_tensors, - const ScatterOptions&) override; - protected: virtual std::shared_ptr CreateTask( std::vector places, int rank, CommType opType, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 6c8fc450cd..8ee22590b6 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -92,7 +92,7 @@ if(NOT ON_INFER) if (WITH_GLOO) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo) endif() - if(WITH_ASCEND) + if(WITH_ASCEND_CL) set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl) endif() set(PYBIND_SRCS ${PYBIND_SRCS} distributed_py.cc) -- GitLab