未验证 提交 c92b3805 编写于 作者: S ShenLiang 提交者: GitHub

Support DCU in ProcessGroup (#43356)

上级 4f006636
...@@ -14,7 +14,7 @@ if(WITH_DISTRIBUTE) ...@@ -14,7 +14,7 @@ if(WITH_DISTRIBUTE)
DEPS phi_api eager_api gloo_wrapper) DEPS phi_api eager_api gloo_wrapper)
endif() endif()
if(WITH_NCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( cc_library(
processgroup_nccl processgroup_nccl
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc
......
...@@ -14,7 +14,13 @@ ...@@ -14,7 +14,13 @@
#pragma once #pragma once
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#include <error.h> #include <error.h>
#include <string> #include <string>
...@@ -23,9 +29,19 @@ ...@@ -23,9 +29,19 @@
#include "paddle/fluid/distributed/collective/Types.h" #include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#else
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -56,7 +72,11 @@ class EventManager { ...@@ -56,7 +72,11 @@ class EventManager {
~EventManager() { ~EventManager() {
if (is_created_) { if (is_created_) {
platform::CUDADeviceGuard guard(device_index_); platform::CUDADeviceGuard guard(device_index_);
#ifdef PADDLE_WITH_HIP
hipEventDestroy(event_);
#else
cudaEventDestroy(event_); cudaEventDestroy(event_);
#endif
} }
} }
...@@ -94,24 +114,42 @@ class EventManager { ...@@ -94,24 +114,42 @@ class EventManager {
device_index, device_index_)); device_index, device_index_));
platform::CUDADeviceGuard guard(device_index_); platform::CUDADeviceGuard guard(device_index_);
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(event_, ctx.stream())); PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(event_, ctx.stream()));
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipEventRecord(event_, ctx.stream()));
#endif
} }
bool Query() const { bool Query() const {
#ifdef PADDLE_WITH_HIP
gpuError_t err = hipEventQuery(event_);
if (err == hipSuccess) {
return true;
}
if (err == hipErrorNotReady) {
return false;
}
#else
gpuError_t err = cudaEventQuery(event_); gpuError_t err = cudaEventQuery(event_);
if (err == cudaSuccess) { if (err == cudaSuccess) {
return true; return true;
} else if (err == cudaErrorNotReady) { }
return false; if (err == cudaErrorNotReady) {
} else {
PADDLE_ENFORCE_GPU_SUCCESS(err);
return false; return false;
} }
#endif
PADDLE_ENFORCE_GPU_SUCCESS(err);
return false;
} }
void Synchronize() const { void Synchronize() const {
if (is_created_) { if (is_created_) {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipEventSynchronize(event_));
#else
PADDLE_ENFORCE_GPU_SUCCESS(cudaEventSynchronize(event_)); PADDLE_ENFORCE_GPU_SUCCESS(cudaEventSynchronize(event_));
#endif
} }
} }
...@@ -124,12 +162,22 @@ class EventManager { ...@@ -124,12 +162,22 @@ class EventManager {
"Event's device %d", "Event's device %d",
device_index, device_index_)); device_index, device_index_));
platform::CUDADeviceGuard guard(device_index_); platform::CUDADeviceGuard guard(device_index_);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipStreamWaitEvent(ctx.stream(), event_, 0));
#else
PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(ctx.stream(), event_, 0)); PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(ctx.stream(), event_, 0));
#endif
} }
} }
private: private:
#ifdef PADDLE_WITH_HIP
unsigned int flags_ = hipEventDefault;
#else
unsigned int flags_ = cudaEventDefault; unsigned int flags_ = cudaEventDefault;
#endif
bool is_created_{false}; bool is_created_{false};
gpuEvent_t event_{}; gpuEvent_t event_{};
int8_t device_index_{0}; int8_t device_index_{0};
...@@ -138,7 +186,13 @@ class EventManager { ...@@ -138,7 +186,13 @@ class EventManager {
void CreateEvent(int device_index) { void CreateEvent(int device_index) {
device_index_ = device_index; device_index_ = device_index;
platform::CUDADeviceGuard guard(device_index); platform::CUDADeviceGuard guard(device_index);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipEventCreateWithFlags(&event_, flags_));
#else
PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreateWithFlags(&event_, flags_)); PADDLE_ENFORCE_GPU_SUCCESS(cudaEventCreateWithFlags(&event_, flags_));
#endif
is_created_ = true; is_created_ = true;
} }
}; };
......
...@@ -95,7 +95,11 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) { ...@@ -95,7 +95,11 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) {
// If we use the work to do barrier, we should block cpu // If we use the work to do barrier, we should block cpu
for (auto& place : places_) { for (auto& place : places_) {
platform::CUDADeviceGuard gpuGuard(place); platform::CUDADeviceGuard gpuGuard(place);
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
#else
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
} }
} }
return true; return true;
......
...@@ -30,8 +30,13 @@ ...@@ -30,8 +30,13 @@
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h" #include "paddle/fluid/platform/stream/cuda_stream.h"
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/NCCLTools.h" #include "paddle/fluid/distributed/collective/NCCLTools.h"
#endif
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#else
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册