Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
40f3f4f0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
40f3f4f0
编写于
12月 16, 2022
作者:
W
Wen Sun
提交者:
GitHub
12月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor: rename files (#49117)
上级
e77d1cac
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
112 addition
and
49 deletion
+112
-49
paddle/fluid/distributed/collective/CMakeLists.txt
paddle/fluid/distributed/collective/CMakeLists.txt
+6
-6
paddle/fluid/distributed/collective/ProcessGroup.h
paddle/fluid/distributed/collective/ProcessGroup.h
+30
-0
paddle/fluid/distributed/collective/ProcessGroupBKCL.h
paddle/fluid/distributed/collective/ProcessGroupBKCL.h
+1
-1
paddle/fluid/distributed/collective/check.cc
paddle/fluid/distributed/collective/check.cc
+1
-1
paddle/fluid/distributed/collective/nccl_tools.cc
paddle/fluid/distributed/collective/nccl_tools.cc
+1
-1
paddle/fluid/distributed/collective/nccl_tools.h
paddle/fluid/distributed/collective/nccl_tools.h
+0
-0
paddle/fluid/distributed/collective/process_group_nccl.cc
paddle/fluid/distributed/collective/process_group_nccl.cc
+2
-2
paddle/fluid/distributed/collective/process_group_nccl.h
paddle/fluid/distributed/collective/process_group_nccl.h
+2
-2
paddle/fluid/distributed/collective/process_group_stream.cc
paddle/fluid/distributed/collective/process_group_stream.cc
+40
-1
paddle/fluid/distributed/collective/process_group_stream.h
paddle/fluid/distributed/collective/process_group_stream.h
+16
-0
paddle/fluid/operators/fused/fused_attention_op.cu
paddle/fluid/operators/fused/fused_attention_op.cu
+1
-1
paddle/fluid/operators/fused/fused_feedforward_op.cu
paddle/fluid/operators/fused/fused_feedforward_op.cu
+1
-1
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/distributed_py.cc
paddle/fluid/pybind/distributed_py.cc
+6
-28
paddle/phi/backends/CMakeLists.txt
paddle/phi/backends/CMakeLists.txt
+1
-1
paddle/phi/backends/processgroup_comm_utils.cc
paddle/phi/backends/processgroup_comm_utils.cc
+1
-1
paddle/phi/kernels/CMakeLists.txt
paddle/phi/kernels/CMakeLists.txt
+1
-1
paddle/phi/kernels/gpu/sync_batch_norm_utils.h
paddle/phi/kernels/gpu/sync_batch_norm_utils.h
+1
-1
未找到文件。
paddle/fluid/distributed/collective/CMakeLists.txt
浏览文件 @
40f3f4f0
...
...
@@ -3,13 +3,13 @@ cc_library(
SRCS ProcessGroup.cc
DEPS dense_tensor
)
cc_library
(
processgroup_stream
SRCS
ProcessGroupS
tream.cc
process
_
group_stream
SRCS
process_group_s
tream.cc
DEPS dense_tensor
)
cc_library
(
eager_reducer
SRCS reducer.cc
DEPS eager_api processgroup processgroup_stream phi_api string_helper
)
DEPS eager_api processgroup process
_
group_stream phi_api string_helper
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
...
...
@@ -20,10 +20,10 @@ endif()
if
(
WITH_NCCL OR WITH_RCCL
)
cc_library
(
processgroup_nccl
SRCS
ProcessGroupNCCL.cc NCCLT
ools.cc Common.cc check.cc
process
_
group_nccl
SRCS
process_group_nccl.cc nccl_t
ools.cc Common.cc check.cc
DEPS processgroup
processgroup_stream
process
_
group_stream
place
enforce
collective_helper
...
...
paddle/fluid/distributed/collective/ProcessGroup.h
浏览文件 @
40f3f4f0
...
...
@@ -97,6 +97,17 @@ class ProcessGroup {
GetBackendName
()));
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
bool
sync_op
)
{
return
AllGather
(
out_tensor
,
in_tensor
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates the whole tensor
sync_op
);
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
...
...
@@ -175,6 +186,16 @@ class ProcessGroup {
GetBackendName
()));
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
bool
sync_op
)
{
return
Recv
(
tensor
,
src_rank
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates the whole tensor
sync_op
);
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
int64_t
offset
,
...
...
@@ -185,6 +206,15 @@ class ProcessGroup {
GetBackendName
()));
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
bool
sync_op
)
{
return
Send
(
tensor
,
dst_rank
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates the whole tensor
sync_op
);
}
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
...
...
paddle/fluid/distributed/collective/ProcessGroupBKCL.h
浏览文件 @
40f3f4f0
...
...
@@ -20,7 +20,7 @@
#include <string>
#include <unordered_map>
#include "paddle/fluid/distributed/collective/
ProcessGroupS
tream.h"
#include "paddle/fluid/distributed/collective/
process_group_s
tream.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/enforce.h"
...
...
paddle/fluid/distributed/collective/check.cc
浏览文件 @
40f3f4f0
...
...
@@ -14,7 +14,7 @@
#include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/
NCCLT
ools.h"
#include "paddle/fluid/distributed/collective/
nccl_t
ools.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/dense_tensor.h"
...
...
paddle/fluid/distributed/collective/
NCCLT
ools.cc
→
paddle/fluid/distributed/collective/
nccl_t
ools.cc
浏览文件 @
40f3f4f0
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/
NCCLT
ools.h"
#include "paddle/fluid/distributed/collective/
nccl_t
ools.h"
#include "paddle/fluid/platform/enforce.h"
...
...
paddle/fluid/distributed/collective/
NCCLT
ools.h
→
paddle/fluid/distributed/collective/
nccl_t
ools.h
浏览文件 @
40f3f4f0
文件已移动
paddle/fluid/distributed/collective/
ProcessGroupNCCL
.cc
→
paddle/fluid/distributed/collective/
process_group_nccl
.cc
浏览文件 @
40f3f4f0
...
...
@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/distributed/collective/NCCLTools.h"
#include "paddle/fluid/distributed/collective/check.h"
#include "paddle/fluid/distributed/collective/nccl_tools.h"
#include "paddle/fluid/distributed/collective/utils.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/platform/place.h"
...
...
paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h
→
paddle/fluid/distributed/collective/
process_group_nccl
.h
浏览文件 @
40f3f4f0
...
...
@@ -20,7 +20,7 @@
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/collective/
ProcessGroupS
tream.h"
#include "paddle/fluid/distributed/collective/
process_group_s
tream.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_event.h"
...
...
@@ -29,7 +29,7 @@
#include "paddle/phi/core/device_context.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
NCCLT
ools.h"
#include "paddle/fluid/distributed/collective/
nccl_t
ools.h"
#endif
#ifdef PADDLE_WITH_RCCL
...
...
paddle/fluid/distributed/collective/
ProcessGroupS
tream.cc
→
paddle/fluid/distributed/collective/
process_group_s
tream.cc
浏览文件 @
40f3f4f0
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/collective/
ProcessGroupS
tream.h"
#include "paddle/fluid/distributed/collective/
process_group_s
tream.h"
namespace
paddle
{
namespace
distributed
{
...
...
@@ -40,6 +40,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::AllGather(
/*use_calc_stream*/
false
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
bool
sync_op
,
bool
use_calc_stream
)
{
return
AllGather
(
out_tensor
,
in_tensor
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates the whole tensor
sync_op
,
use_calc_stream
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
...
...
@@ -200,6 +213,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Recv(
/*use_calc_stream*/
false
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
bool
sync_op
,
bool
use_calc_stream
)
{
return
Recv
(
tensor
,
src_rank
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates sending the whole tensor
sync_op
,
use_calc_stream
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
...
...
@@ -225,6 +251,19 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupStream::Send(
/*use_calc_stream*/
false
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
bool
sync_op
,
bool
use_calc_stream
)
{
return
Send
(
tensor
,
dst_rank
,
/*offset*/
0
,
/*numel*/
-
1
,
// -1 indicates receiving the whole tensor
sync_op
,
use_calc_stream
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupStream
::
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
...
...
paddle/fluid/distributed/collective/
ProcessGroupS
tream.h
→
paddle/fluid/distributed/collective/
process_group_s
tream.h
浏览文件 @
40f3f4f0
...
...
@@ -69,6 +69,12 @@ class ProcessGroupStream : public ProcessGroup {
int64_t
numel
,
bool
sync_op
)
override
;
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
bool
sync_op
,
bool
use_calc_stream
);
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather
(
phi
::
DenseTensor
*
out_tensor
,
const
phi
::
DenseTensor
&
in_tensor
,
...
...
@@ -161,6 +167,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t
numel
,
bool
sync_op
)
override
;
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
bool
sync_op
,
bool
use_calc_stream
);
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Recv
(
phi
::
DenseTensor
*
tensor
,
int
src_rank
,
int64_t
offset
,
...
...
@@ -174,6 +185,11 @@ class ProcessGroupStream : public ProcessGroup {
int64_t
numel
,
bool
sync_op
)
override
;
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
bool
sync_op
,
bool
use_calc_stream
);
virtual
std
::
shared_ptr
<
ProcessGroup
::
Task
>
Send
(
const
phi
::
DenseTensor
&
tensor
,
int
dst_rank
,
...
...
paddle/fluid/operators/fused/fused_attention_op.cu
浏览文件 @
40f3f4f0
...
...
@@ -30,7 +30,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
...
...
paddle/fluid/operators/fused/fused_feedforward_op.cu
浏览文件 @
40f3f4f0
...
...
@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
40f3f4f0
...
...
@@ -157,7 +157,7 @@ endif()
if
(
WITH_PYTHON
)
set
(
PYBIND_DEPS
${
PYBIND_DEPS
}
processgroup eager_reducer
)
if
(
WITH_NCCL OR WITH_RCCL
)
set
(
PYBIND_DEPS
${
PYBIND_DEPS
}
processgroup_nccl
)
set
(
PYBIND_DEPS
${
PYBIND_DEPS
}
process
_
group_nccl
)
endif
()
if
(
WITH_XPU_BKCL
)
set
(
PYBIND_DEPS
${
PYBIND_DEPS
}
processgroup_bkcl
)
...
...
paddle/fluid/pybind/distributed_py.cc
浏览文件 @
40f3f4f0
...
...
@@ -22,8 +22,8 @@ limitations under the License. */
#endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/fluid/distributed/collective/ProcessGroupStream.h"
#include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/distributed/collective/process_group_stream.h"
#include "paddle/fluid/distributed/collective/reducer.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
...
...
@@ -34,7 +34,7 @@ limitations under the License. */
#include "paddle/phi/api/all.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#endif
#if defined(PADDLE_WITH_MPI)
...
...
@@ -169,9 +169,7 @@ void BindDistributed(py::module *m) {
auto
p_dense
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor
.
impl
());
auto
out_dense
=
*
p_dense
;
// numel == -1 indicates sending the whole tensor
return
self
.
Send
(
out_dense
,
dst
,
/*offset*/
0
,
/*numel*/
-
1
,
sync_op
);
return
self
.
Send
(
out_dense
,
dst
,
sync_op
);
},
py
::
arg
(
"tensor"
),
py
::
arg
(
"dst"
),
...
...
@@ -215,9 +213,7 @@ void BindDistributed(py::module *m) {
auto
p_dense
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor
.
impl
());
auto
*
in_dense
=
p_dense
.
get
();
// numel == -1 indicates receiving the whole tensor
return
self
.
Recv
(
in_dense
,
src
,
/*offset*/
0
,
/*numel*/
-
1
,
sync_op
);
return
self
.
Recv
(
in_dense
,
src
,
sync_op
);
},
py
::
arg
(
"tensor"
),
py
::
arg
(
"src"
),
...
...
@@ -270,11 +266,7 @@ void BindDistributed(py::module *m) {
auto
in_dense
=
*
p_in_tensor
;
auto
*
dev_ctx
=
self
.
GetDeviceContext
(
in_tensor
.
place
());
auto
task
=
self
.
AllGather
(
out_dense
,
in_dense
,
/*offset*/
0
,
/*numel*/
-
1
,
sync_op
);
auto
task
=
self
.
AllGather
(
out_dense
,
in_dense
,
sync_op
);
SplitTensor
(
*
dev_ctx
,
*
out_dense
,
&
out_tensor_list
);
task
->
UpdateWaitChain
(
*
dev_ctx
);
return
task
;
...
...
@@ -300,11 +292,7 @@ void BindDistributed(py::module *m) {
in_tensor
.
impl
());
auto
in_dense
=
*
p_in_tensor
;
return
self
.
AllGather
(
out_dense
,
in_dense
,
/*offset*/
0
,
/*numel*/
-
1
,
sync_op
);
return
self
.
AllGather
(
out_dense
,
in_dense
,
sync_op
);
},
py
::
arg
(
"out"
),
py
::
arg
(
"in"
),
...
...
@@ -771,8 +759,6 @@ void BindDistributed(py::module *m) {
auto
*
dev_ctx
=
self
.
GetDeviceContext
(
in_tensor
.
place
(),
true
);
auto
task
=
self
.
AllGather
(
out_dense
,
in_dense
,
/*offset*/
0
,
/*numel*/
-
1
,
/*sync_op*/
true
,
/*use_calc_stream*/
true
);
SplitTensor
(
*
dev_ctx
,
*
out_dense
,
&
out_tensor_list
);
...
...
@@ -799,8 +785,6 @@ void BindDistributed(py::module *m) {
return
self
.
AllGather
(
out_dense
,
in_dense
,
/*offset*/
0
,
/*numel*/
-
1
,
/*sync_op*/
true
,
/*use_calc_stream*/
true
);
},
...
...
@@ -1127,11 +1111,8 @@ void BindDistributed(py::module *m) {
auto
p_dense
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor
.
impl
());
auto
out_dense
=
*
p_dense
;
// numel == -1 indicates sending the whole tensor
return
self
.
Send
(
out_dense
,
dst
,
/*offset*/
0
,
/*numel*/
-
1
,
/*sync_op*/
true
,
/*use_calc_stream*/
true
);
},
...
...
@@ -1177,11 +1158,8 @@ void BindDistributed(py::module *m) {
auto
p_dense
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor
.
impl
());
auto
*
in_dense
=
p_dense
.
get
();
// numel == -1 indicates receiving the whole tensor
return
self
.
Recv
(
in_dense
,
src
,
/*offset*/
0
,
/*numel*/
-
1
,
/*sync_op*/
true
,
/*use_calc_stream*/
true
);
},
...
...
paddle/phi/backends/CMakeLists.txt
浏览文件 @
40f3f4f0
...
...
@@ -67,7 +67,7 @@ endif()
set
(
COMM_UTILS_DEPS processgroup
)
if
(
WITH_NCCL OR WITH_RCCL
)
set
(
COMM_UTILS_DEPS
${
PROCESS_GROUP_UTILS_DEPS
}
processgroup_nccl
)
set
(
COMM_UTILS_DEPS
${
PROCESS_GROUP_UTILS_DEPS
}
process
_
group_nccl
)
endif
()
if
(
WITH_CUSTOM_DEVICE
)
set
(
COMM_UTILS_DEPS
${
PROCESS_GROUP_UTILS_DEPS
}
processgroup_custom
)
...
...
paddle/phi/backends/processgroup_comm_utils.cc
浏览文件 @
40f3f4f0
...
...
@@ -15,7 +15,7 @@
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/phi/backends/c_comm_lib.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#endif
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
#include "paddle/fluid/distributed/collective/ProcessGroupCustom.h"
...
...
paddle/phi/kernels/CMakeLists.txt
浏览文件 @
40f3f4f0
...
...
@@ -80,7 +80,7 @@ set(COMMON_KERNEL_DEPS
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
processgroup
)
if
(
WITH_NCCL OR WITH_RCCL
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
processgroup_nccl
)
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
process
_
group_nccl
)
endif
()
set
(
COMMON_KERNEL_DEPS
${
COMMON_KERNEL_DEPS
}
processgroup_comm_utils
)
if
(
WITH_CUDNN_FRONTEND
)
...
...
paddle/phi/kernels/gpu/sync_batch_norm_utils.h
浏览文件 @
40f3f4f0
...
...
@@ -28,7 +28,7 @@ namespace cub = hipcub;
#endif
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/distributed/collective/
ProcessGroupNCCL
.h"
#include "paddle/fluid/distributed/collective/
process_group_nccl
.h"
#endif
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录