Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
307ad60d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
307ad60d
编写于
8月 22, 2022
作者:
R
ronnywang
提交者:
GitHub
8月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CustomDevice] fix custom ccl (#45276)
上级
bba13e21
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
143 addition
and
1 deletion
+143
-1
paddle/fluid/distributed/collective/Common.cc
paddle/fluid/distributed/collective/Common.cc
+9
-0
paddle/fluid/distributed/collective/Common.h
paddle/fluid/distributed/collective/Common.h
+3
-0
paddle/fluid/distributed/collective/ProcessGroupCustom.cc
paddle/fluid/distributed/collective/ProcessGroupCustom.cc
+111
-0
paddle/fluid/distributed/collective/ProcessGroupCustom.h
paddle/fluid/distributed/collective/ProcessGroupCustom.h
+10
-0
paddle/phi/backends/custom/custom_device.cc
paddle/phi/backends/custom/custom_device.cc
+2
-0
paddle/phi/backends/custom/custom_device_test.cc
paddle/phi/backends/custom/custom_device_test.cc
+1
-0
paddle/phi/backends/custom/fake_cpu_device.h
paddle/phi/backends/custom/fake_cpu_device.h
+1
-0
paddle/phi/backends/device_base.cc
paddle/phi/backends/device_base.cc
+1
-0
paddle/phi/backends/device_base.h
paddle/phi/backends/device_base.h
+1
-0
paddle/phi/backends/device_ext.h
paddle/phi/backends/device_ext.h
+1
-0
paddle/phi/backends/device_manager.cc
paddle/phi/backends/device_manager.cc
+2
-1
paddle/phi/backends/device_manager.h
paddle/phi/backends/device_manager.h
+1
-0
未找到文件。
paddle/fluid/distributed/collective/Common.cc
浏览文件 @
307ad60d
...
@@ -47,5 +47,14 @@ bool CheckTensorsInCudaPlace(const std::vector<phi::DenseTensor>& tensors) {
...
@@ -47,5 +47,14 @@ bool CheckTensorsInCudaPlace(const std::vector<phi::DenseTensor>& tensors) {
});
});
}
}
bool
CheckTensorsInCustomPlace
(
const
std
::
vector
<
phi
::
DenseTensor
>&
tensors
,
const
std
::
string
&
dev_type
)
{
return
std
::
all_of
(
tensors
.
cbegin
(),
tensors
.
cend
(),
[
&
](
const
phi
::
DenseTensor
&
t
)
{
return
platform
::
places_are_same_class
(
t
.
place
(),
paddle
::
platform
::
CustomPlace
(
dev_type
));
});
}
}
// namespace distributed
}
// namespace distributed
}
// namespace paddle
}
// namespace paddle
paddle/fluid/distributed/collective/Common.h
浏览文件 @
307ad60d
...
@@ -28,5 +28,8 @@ std::string GetKeyFromPlaces(const std::vector<Place>& places);
...
@@ -28,5 +28,8 @@ std::string GetKeyFromPlaces(const std::vector<Place>& places);
bool
CheckTensorsInCudaPlace
(
const
std
::
vector
<
phi
::
DenseTensor
>&
tensors
);
bool
CheckTensorsInCudaPlace
(
const
std
::
vector
<
phi
::
DenseTensor
>&
tensors
);
bool
CheckTensorsInCustomPlace
(
const
std
::
vector
<
phi
::
DenseTensor
>&
tensors
,
const
std
::
string
&
dev_type
);
}
// namespace distributed
}
// namespace distributed
}
// namespace paddle
}
// namespace paddle
paddle/fluid/distributed/collective/ProcessGroupCustom.cc
浏览文件 @
307ad60d
...
@@ -207,10 +207,111 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Collective(
...
@@ -207,10 +207,111 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Collective(
return
task
;
return
task
;
}
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupCustom
::
AllGather
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
)
{
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
in_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All inputs should be in CustomPlace(%s)."
,
device_type_
));
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
out_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All outputs should be in CustomPlace(%s)."
,
device_type_
));
return
Collective
(
in_tensors
,
out_tensors
,
[
&
](
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
&
output
,
phi
::
ccl
::
CCLComm
comm
,
const
phi
::
stream
::
Stream
&
stream
)
{
return
phi
::
DeviceManager
::
CCLAllGather
(
device_type_
,
input
.
data
(),
output
.
data
(),
input
.
numel
(),
phi
::
ccl
::
ToCCLDataType
(
input
.
dtype
()),
comm
,
stream
);
},
CommType
::
ALLGATHER
);
}
void
*
XcclGetPointerByOffset
(
void
*
raw_pointer
,
size_t
offset
,
experimental
::
DataType
type
)
{
if
(
type
==
experimental
::
DataType
::
FLOAT32
)
{
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
float
*>
(
raw_pointer
)
+
offset
);
}
else
if
(
type
==
experimental
::
DataType
::
FLOAT64
)
{
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
double
*>
(
raw_pointer
)
+
offset
);
}
else
if
(
type
==
experimental
::
DataType
::
INT32
)
{
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
int32_t
*>
(
raw_pointer
)
+
offset
);
}
else
if
(
type
==
experimental
::
DataType
::
INT64
)
{
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
int64_t
*>
(
raw_pointer
)
+
offset
);
}
else
if
(
type
==
experimental
::
DataType
::
FLOAT16
)
{
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
int16_t
*>
(
raw_pointer
)
+
offset
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"This datatype in xccl is not supported."
));
}
return
nullptr
;
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupCustom
::
AllGather_Partial
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
int
offset
,
int
length
)
{
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
in_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All inputs should be in CustomPlace(%s)."
,
device_type_
));
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
out_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All outputs should be in CustomPlace(%s)."
,
device_type_
));
return
Collective
(
in_tensors
,
out_tensors
,
[
&
](
phi
::
DenseTensor
&
input
,
phi
::
DenseTensor
&
output
,
phi
::
ccl
::
CCLComm
comm
,
const
phi
::
stream
::
Stream
&
stream
)
{
return
phi
::
DeviceManager
::
CCLAllGather
(
device_type_
,
XcclGetPointerByOffset
(
input
.
data
(),
offset
,
input
.
dtype
()),
output
.
data
(),
length
,
phi
::
ccl
::
ToCCLDataType
(
input
.
dtype
()),
comm
,
stream
);
},
CommType
::
ALLGATHER
);
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupCustom
::
AllReduce
(
std
::
shared_ptr
<
ProcessGroup
::
Task
>
ProcessGroupCustom
::
AllReduce
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
// NOLINT
const
AllreduceOptions
&
opts
)
{
const
AllreduceOptions
&
opts
)
{
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
in_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All inputs should be in CustomPlace(%s)."
,
device_type_
));
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
out_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All outputs should be in CustomPlace(%s)."
,
device_type_
));
return
Collective
(
return
Collective
(
in_tensors
,
in_tensors
,
out_tensors
,
out_tensors
,
...
@@ -235,6 +336,16 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Broadcast(
...
@@ -235,6 +336,16 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Broadcast(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
// NOLINT
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
// NOLINT
const
BroadcastOptions
&
opts
)
{
const
BroadcastOptions
&
opts
)
{
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
in_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All inputs should be in CustomPlace(%s)."
,
device_type_
));
PADDLE_ENFORCE_EQ
(
CheckTensorsInCustomPlace
(
out_tensors
,
device_type_
),
true
,
platform
::
errors
::
InvalidArgument
(
"All outputs should be in CustomPlace(%s)."
,
device_type_
));
return
Collective
(
return
Collective
(
in_tensors
,
in_tensors
,
out_tensors
,
out_tensors
,
...
...
paddle/fluid/distributed/collective/ProcessGroupCustom.h
浏览文件 @
307ad60d
...
@@ -73,6 +73,16 @@ class ProcessGroupCustom : public ProcessGroup {
...
@@ -73,6 +73,16 @@ class ProcessGroupCustom : public ProcessGroup {
return
"XCCL_"
+
device_type_
;
return
"XCCL_"
+
device_type_
;
}
}
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
)
override
;
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllGather_Partial
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
int
offset
,
int
length
)
override
;
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllReduce
(
std
::
shared_ptr
<
ProcessGroup
::
Task
>
AllReduce
(
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
in_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
std
::
vector
<
phi
::
DenseTensor
>&
out_tensors
,
...
...
paddle/phi/backends/custom/custom_device.cc
浏览文件 @
307ad60d
...
@@ -705,6 +705,7 @@ class CustomDevice : public DeviceInterface {
...
@@ -705,6 +705,7 @@ class CustomDevice : public DeviceInterface {
size_t
num
,
size_t
num
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLReduceOp
reduce_op
,
ccl
::
CCLReduceOp
reduce_op
,
size_t
root_id
,
const
ccl
::
CCLComm
&
comm
,
const
ccl
::
CCLComm
&
comm
,
const
stream
::
Stream
&
stream
)
override
{
const
stream
::
Stream
&
stream
)
override
{
CHECK_PTR
(
pimpl_
->
xccl_reduce
);
CHECK_PTR
(
pimpl_
->
xccl_reduce
);
...
@@ -714,6 +715,7 @@ class CustomDevice : public DeviceInterface {
...
@@ -714,6 +715,7 @@ class CustomDevice : public DeviceInterface {
num
,
num
,
ToXCCLDataType
(
data_type
),
ToXCCLDataType
(
data_type
),
ToXCCLReduceOp
(
reduce_op
),
ToXCCLReduceOp
(
reduce_op
),
root_id
,
reinterpret_cast
<
C_CCLComm
>
(
comm
),
reinterpret_cast
<
C_CCLComm
>
(
comm
),
reinterpret_cast
<
C_Stream
>
(
stream
.
raw_stream
())));
reinterpret_cast
<
C_Stream
>
(
stream
.
raw_stream
())));
}
}
...
...
paddle/phi/backends/custom/custom_device_test.cc
浏览文件 @
307ad60d
...
@@ -203,6 +203,7 @@ void TestCustomCCL(const paddle::platform::Place& place) {
...
@@ -203,6 +203,7 @@ void TestCustomCCL(const paddle::platform::Place& place) {
0
,
0
,
phi
::
ccl
::
CCLDataType
::
CCL_DATA_TYPE_FP32
,
phi
::
ccl
::
CCLDataType
::
CCL_DATA_TYPE_FP32
,
phi
::
ccl
::
CCLReduceOp
::
SUM
,
phi
::
ccl
::
CCLReduceOp
::
SUM
,
0
,
comm
,
comm
,
stream
);
stream
);
phi
::
DeviceManager
::
CCLAllGather
(
dev_type
,
phi
::
DeviceManager
::
CCLAllGather
(
dev_type
,
...
...
paddle/phi/backends/custom/fake_cpu_device.h
浏览文件 @
307ad60d
...
@@ -170,6 +170,7 @@ C_Status XcclReduce(void *send_buf,
...
@@ -170,6 +170,7 @@ C_Status XcclReduce(void *send_buf,
size_t
count
,
size_t
count
,
C_DataType
data_type
,
C_DataType
data_type
,
C_CCLReduceOp
op
,
C_CCLReduceOp
op
,
size_t
root_id
,
C_CCLComm
comm
,
C_CCLComm
comm
,
C_Stream
stream
)
{
C_Stream
stream
)
{
return
C_SUCCESS
;
return
C_SUCCESS
;
...
...
paddle/phi/backends/device_base.cc
浏览文件 @
307ad60d
...
@@ -309,6 +309,7 @@ void DeviceInterface::CCLReduce(void* in_data,
...
@@ -309,6 +309,7 @@ void DeviceInterface::CCLReduce(void* in_data,
size_t
num
,
size_t
num
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLReduceOp
reduce_op
,
ccl
::
CCLReduceOp
reduce_op
,
size_t
root_id
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
stream
::
Stream
&
stream
)
{
const
stream
::
Stream
&
stream
)
{
INTERFACE_UNIMPLEMENT
;
INTERFACE_UNIMPLEMENT
;
...
...
paddle/phi/backends/device_base.h
浏览文件 @
307ad60d
...
@@ -195,6 +195,7 @@ class DeviceInterface { // Driver / Runtime
...
@@ -195,6 +195,7 @@ class DeviceInterface { // Driver / Runtime
size_t
num
,
size_t
num
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLReduceOp
reduce_op
,
ccl
::
CCLReduceOp
reduce_op
,
size_t
root_id
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
stream
::
Stream
&
stream
);
const
stream
::
Stream
&
stream
);
virtual
void
CCLAllGather
(
void
*
in_data
,
virtual
void
CCLAllGather
(
void
*
in_data
,
...
...
paddle/phi/backends/device_ext.h
浏览文件 @
307ad60d
...
@@ -593,6 +593,7 @@ struct C_DeviceInterface {
...
@@ -593,6 +593,7 @@ struct C_DeviceInterface {
size_t
count
,
size_t
count
,
C_DataType
data_type
,
C_DataType
data_type
,
C_CCLReduceOp
op
,
C_CCLReduceOp
op
,
size_t
root
,
C_CCLComm
comm
,
C_CCLComm
comm
,
C_Stream
stream
);
C_Stream
stream
);
...
...
paddle/phi/backends/device_manager.cc
浏览文件 @
307ad60d
...
@@ -536,11 +536,12 @@ void DeviceManager::CCLReduce(const std::string& device_type,
...
@@ -536,11 +536,12 @@ void DeviceManager::CCLReduce(const std::string& device_type,
size_t
num
,
size_t
num
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLReduceOp
reduce_op
,
ccl
::
CCLReduceOp
reduce_op
,
size_t
root_id
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
stream
::
Stream
&
stream
)
{
const
stream
::
Stream
&
stream
)
{
auto
dev_impl
=
GetDeviceInterfaceWithType
(
device_type
);
auto
dev_impl
=
GetDeviceInterfaceWithType
(
device_type
);
dev_impl
->
CCLReduce
(
dev_impl
->
CCLReduce
(
in_data
,
out_data
,
num
,
data_type
,
reduce_op
,
ccl_comm
,
stream
);
in_data
,
out_data
,
num
,
data_type
,
reduce_op
,
root_id
,
ccl_comm
,
stream
);
}
}
void
DeviceManager
::
CCLAllGather
(
const
std
::
string
&
device_type
,
void
DeviceManager
::
CCLAllGather
(
const
std
::
string
&
device_type
,
...
...
paddle/phi/backends/device_manager.h
浏览文件 @
307ad60d
...
@@ -206,6 +206,7 @@ class DeviceManager {
...
@@ -206,6 +206,7 @@ class DeviceManager {
size_t
num
,
size_t
num
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLDataType
data_type
,
ccl
::
CCLReduceOp
reduce_op
,
ccl
::
CCLReduceOp
reduce_op
,
size_t
root_id
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
ccl
::
CCLComm
&
ccl_comm
,
const
stream
::
Stream
&
stream
);
const
stream
::
Stream
&
stream
);
static
void
CCLAllGather
(
const
std
::
string
&
device_type
,
static
void
CCLAllGather
(
const
std
::
string
&
device_type
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录