未验证 提交 f70f5e4f 编写于 作者: L lilong12 提交者: GitHub

fix the bug for processgroup_hccl compiling (#40437)

上级 0d78e491
......@@ -139,12 +139,10 @@ bool ProcessGroupHCCL::HCCLTask::IsCompleted() {
// TODO(sandyhouse): Add timeout for wait, now timeout unused
bool ProcessGroupHCCL::HCCLTask::Wait(std::chrono::milliseconds timeout) {
SynchronizeStreams();
if (FLAGS_hccl_blocking_wait) {
// NOTE(sandyhouse): It will block host for sync
while (!IsCompleted()) {
std::this_thread::sleep_for(std::chrono::milliseconds(kWaitBlockTImeout));
}
}
return true;
}
......
......@@ -84,29 +84,6 @@ class ProcessGroupHCCL : public ProcessGroup {
std::vector<Tensor>& tensors,
const BroadcastOptions& = BroadcastOptions()) override;
std::shared_ptr<ProcessGroup::Task> Barrier(
const BarrierOptions& = BarrierOptions()) override;
std::shared_ptr<ProcessGroup::Task> Send(std::vector<Tensor>& tensors,
int dst_rank) override;
std::shared_ptr<ProcessGroup::Task> Recv(std::vector<Tensor>& tensors,
int src_rank) override;
std::shared_ptr<ProcessGroup::Task> AllGather(
std::vector<Tensor>& in_tensors,
std::vector<Tensor>& out_tensors) override;
std::shared_ptr<ProcessGroup::Task> AllToAll(
std::vector<Tensor>& in, std::vector<Tensor>& out) override;
std::shared_ptr<ProcessGroup::Task> Reduce(
std::vector<Tensor>& tensors, const ReduceOptions& opts) override;
std::shared_ptr<ProcessGroup::Task> Scatter(std::vector<Tensor>& in_tensors,
std::vector<Tensor>& out_tensors,
const ScatterOptions&) override;
protected:
virtual std::shared_ptr<ProcessGroupHCCL::HCCLTask> CreateTask(
std::vector<Place> places, int rank, CommType opType,
......
......@@ -92,7 +92,7 @@ if(NOT ON_INFER)
if (WITH_GLOO)
set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo)
endif()
if(WITH_ASCEND)
if(WITH_ASCEND_CL)
set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl)
endif()
set(PYBIND_SRCS ${PYBIND_SRCS} distributed_py.cc)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册