Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c92b3805
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c92b3805
编写于
6月 13, 2022
作者:
S
ShenLiang
提交者:
GitHub
6月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support DCU in ProcessGroup (#43356)
上级
4f006636
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
69 addition
and
6 deletion
+69
-6
paddle/fluid/distributed/collective/CMakeLists.txt
paddle/fluid/distributed/collective/CMakeLists.txt
+1
-1
paddle/fluid/distributed/collective/NCCLTools.h
paddle/fluid/distributed/collective/NCCLTools.h
+58
-4
paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
+4
-0
paddle/fluid/distributed/collective/ProcessGroupNCCL.h
paddle/fluid/distributed/collective/ProcessGroupNCCL.h
+6
-1
未找到文件。
paddle/fluid/distributed/collective/CMakeLists.txt
浏览文件 @
c92b3805
...
...
@@ -14,7 +14,7 @@ if(WITH_DISTRIBUTE)
DEPS phi_api eager_api gloo_wrapper
)
endif
()
if
(
WITH_NCCL
)
if
(
WITH_NCCL
OR WITH_RCCL
)
cc_library
(
processgroup_nccl
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc
...
...
paddle/fluid/distributed/collective/NCCLTools.h
浏览文件 @
c92b3805
...
...
@@ -14,7 +14,13 @@
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#include <error.h>
#include <string>
...
...
@@ -23,9 +29,19 @@
#include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/variable.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#else
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
@@ -56,7 +72,11 @@ class EventManager {
~
EventManager
()
{
if
(
is_created_
)
{
platform
::
CUDADeviceGuard
guard
(
device_index_
);
#ifdef PADDLE_WITH_HIP
hipEventDestroy
(
event_
);
#else
cudaEventDestroy
(
event_
);
#endif
}
}
...
...
@@ -94,24 +114,42 @@ class EventManager {
device_index
,
device_index_
));
platform
::
CUDADeviceGuard
guard
(
device_index_
);
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaEventRecord
(
event_
,
ctx
.
stream
()));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
hipEventRecord
(
event_
,
ctx
.
stream
()));
#endif
}
bool
Query
()
const
{
#ifdef PADDLE_WITH_HIP
gpuError_t
err
=
hipEventQuery
(
event_
);
if
(
err
==
hipSuccess
)
{
return
true
;
}
if
(
err
==
hipErrorNotReady
)
{
return
false
;
}
#else
gpuError_t
err
=
cudaEventQuery
(
event_
);
if
(
err
==
cudaSuccess
)
{
return
true
;
}
else
if
(
err
==
cudaErrorNotReady
)
{
return
false
;
}
else
{
PADDLE_ENFORCE_GPU_SUCCESS
(
err
);
}
if
(
err
==
cudaErrorNotReady
)
{
return
false
;
}
#endif
PADDLE_ENFORCE_GPU_SUCCESS
(
err
);
return
false
;
}
void
Synchronize
()
const
{
if
(
is_created_
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipEventSynchronize
(
event_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaEventSynchronize
(
event_
));
#endif
}
}
...
...
@@ -124,12 +162,22 @@ class EventManager {
"Event's device %d"
,
device_index
,
device_index_
));
platform
::
CUDADeviceGuard
guard
(
device_index_
);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipStreamWaitEvent
(
ctx
.
stream
(),
event_
,
0
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaStreamWaitEvent
(
ctx
.
stream
(),
event_
,
0
));
#endif
}
}
private:
#ifdef PADDLE_WITH_HIP
unsigned
int
flags_
=
hipEventDefault
;
#else
unsigned
int
flags_
=
cudaEventDefault
;
#endif
bool
is_created_
{
false
};
gpuEvent_t
event_
{};
int8_t
device_index_
{
0
};
...
...
@@ -138,7 +186,13 @@ class EventManager {
void
CreateEvent
(
int
device_index
)
{
device_index_
=
device_index
;
platform
::
CUDADeviceGuard
guard
(
device_index
);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipEventCreateWithFlags
(
&
event_
,
flags_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaEventCreateWithFlags
(
&
event_
,
flags_
));
#endif
is_created_
=
true
;
}
};
...
...
paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
浏览文件 @
c92b3805
...
...
@@ -95,7 +95,11 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) {
// If we use the work to do barrier, we should block cpu
for
(
auto
&
place
:
places_
)
{
platform
::
CUDADeviceGuard
gpuGuard
(
place
);
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaDeviceSynchronize
());
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
hipDeviceSynchronize
());
#endif
}
}
return
true
;
...
...
paddle/fluid/distributed/collective/ProcessGroupNCCL.h
浏览文件 @
c92b3805
...
...
@@ -30,8 +30,13 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
#if defined(PADDLE_WITH_NCCL)
#if defined(PADDLE_WITH_NCCL)
|| defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#endif
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#else
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录