Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
1e18b57b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1e18b57b
编写于
4月 20, 2022
作者:
L
lilong12
提交者:
GitHub
4月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update (#41762) (#41843)
上级
5ce7f48d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
19 addition
and
30 deletion
+19
-30
paddle/fluid/distributed/collective/ProcessGroupHeter.cc
paddle/fluid/distributed/collective/ProcessGroupHeter.cc
+17
-27
paddle/fluid/distributed/ps/service/heter_client.cc
paddle/fluid/distributed/ps/service/heter_client.cc
+2
-3
未找到文件。
paddle/fluid/distributed/collective/ProcessGroupHeter.cc
浏览文件 @
1e18b57b
...
...
@@ -103,11 +103,12 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
if
(
local_rank_
==
0
)
{
std
::
vector
<
phi
::
DenseTensor
>
cpu_tensors
;
cpu_tensors
.
reserve
(
in_tensors
.
size
());
phi
::
DenseTensor
cpu_tensor
;
for
(
size_t
i
=
0
;
i
<
in_tensors
.
size
();
i
++
)
{
auto
gpu_tensor
=
in_tensors
[
i
];
auto
cpu_tensor
=
cpu_tensors
[
i
];
cpu_tensor
.
Resize
(
gpu_tensor
.
dims
());
framework
::
TensorCopySync
(
gpu_tensor
,
platform
::
CPUPlace
(),
&
cpu_tensor
);
cpu_tensors
.
push_back
(
cpu_tensor
);
}
// Step3: do inter cluster allreduce
if
(
with_switch_
)
{
...
...
@@ -123,37 +124,32 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
framework
::
DataTypeSize
(
dense_cpu_tensor
.
dtype
()));
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Send to the switch module error."
));
phi
::
DenseTensorMeta
meta
=
phi
::
DenseTensorMeta
(
dense_cpu_tensor
.
dtype
(),
dense_cpu_tensor
.
dims
());
std
::
shared_ptr
<
phi
::
DenseTensor
>
dense_cpu_tensor2
=
std
::
make_shared
<
phi
::
DenseTensor
>
(
std
::
make_unique
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
())
.
get
(),
meta
);
dense_cpu_tensor2
->
ResizeAndAllocate
(
dense_cpu_tensor
.
dims
());
phi
::
DenseTensor
cpu_tensor2
;
cpu_tensor2
.
AllocateFrom
(
std
::
make_unique
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
())
.
get
(),
dense_cpu_tensor
.
dtype
(),
dense_cpu_tensor
.
numel
());
ret
=
client_
->
Recv
(
gid_
,
{
dense_cpu_tensor
.
name
()},
dense_cpu_tensor2
->
data
(),
dense_cpu_tensor2
->
numel
()
*
framework
::
DataTypeSize
(
dense_cpu_tensor2
->
dtype
()));
gid_
,
{
dense_cpu_tensor
.
name
()},
cpu_tensor2
.
data
(),
cpu_tensor2
.
numel
()
*
framework
::
DataTypeSize
(
cpu_tensor2
.
dtype
()));
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Recv from the switch module error."
));
switch
(
dense_cpu_tensor
.
dtype
())
{
case
DataType
::
FLOAT32
:
_do_add
<
float
>
(
reinterpret_cast
<
float
*>
(
dense_cpu_tensor
.
data
()),
reinterpret_cast
<
float
*>
(
dense_cpu_tensor2
->
data
()),
reinterpret_cast
<
float
*>
(
cpu_tensor2
.
data
()),
dense_cpu_tensor
.
numel
());
break
;
case
DataType
::
FLOAT64
:
_do_add
<
double
>
(
reinterpret_cast
<
double
*>
(
dense_cpu_tensor
.
data
()),
reinterpret_cast
<
double
*>
(
dense_cpu_tensor2
->
data
()),
dense_cpu_tensor
.
numel
());
_do_add
<
double
>
(
reinterpret_cast
<
double
*>
(
dense_cpu_tensor
.
data
()),
reinterpret_cast
<
double
*>
(
cpu_tensor2
.
data
()),
dense_cpu_tensor
.
numel
());
break
;
case
DataType
::
INT32
:
_do_add
<
int
>
(
reinterpret_cast
<
int
*>
(
dense_cpu_tensor
.
data
()),
reinterpret_cast
<
int
*>
(
dense_cpu_tensor2
->
data
()),
reinterpret_cast
<
int
*>
(
cpu_tensor2
.
data
()),
dense_cpu_tensor
.
numel
());
break
;
default:
...
...
@@ -205,9 +201,10 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
cpu_tensors
.
reserve
(
in_tensors
.
size
());
for
(
size_t
i
=
0
;
i
<
in_tensors
.
size
();
i
++
)
{
auto
gpu_tensor
=
in_tensors
[
i
];
auto
cpu_tensor
=
cpu_tensors
[
i
]
;
phi
::
DenseTensor
cpu_tensor
;
cpu_tensor
.
Resize
(
gpu_tensor
.
dims
());
framework
::
TensorCopySync
(
gpu_tensor
,
platform
::
CPUPlace
(),
&
cpu_tensor
);
cpu_tensors
.
push_back
(
cpu_tensor
);
}
if
(
with_switch_
)
{
if
(
local_rank_
==
0
)
{
...
...
@@ -232,13 +229,6 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Receive from the switch module error."
));
ret
=
client_
->
Recv
(
gid_
,
{
dense_cpu_tensor
.
name
()},
dense_cpu_tensor
.
data
(),
dense_cpu_tensor
.
numel
()
*
framework
::
DataTypeSize
(
dense_cpu_tensor
.
dtype
()));
PADDLE_ENFORCE_EQ
(
ret
,
0
,
platform
::
errors
::
PreconditionNotMet
(
"Receive from the switch module error."
));
}
}
}
else
{
...
...
paddle/fluid/distributed/ps/service/heter_client.cc
浏览文件 @
1e18b57b
...
...
@@ -286,8 +286,7 @@ int HeterClient::Send(int group_id, const std::vector<std::string>& var_names,
request
.
add_vars_len
(
var_len
);
}
auto
&
request_buffer
=
closure
->
cntl
.
request_attachment
();
request_buffer
.
append
(
reinterpret_cast
<
void
*>
(
data_ptr
),
data_size
*
sizeof
(
float
));
request_buffer
.
append
(
reinterpret_cast
<
void
*>
(
data_ptr
),
data_size
);
auto
promise
=
std
::
make_shared
<
std
::
promise
<
int32_t
>>
();
closure
->
add_promise
(
promise
);
std
::
future
<
int
>
fut
=
promise
->
get_future
();
...
...
@@ -387,7 +386,7 @@ int HeterClient::Recv(int group_id, const std::vector<std::string>& var_names,
if
(
xpu_channels_
.
size
()
<
2
)
{
LOG
(
ERROR
)
<<
"xpu_channels_ is null"
;
}
recv_switch_channels_
.
push_back
(
xpu_channels_
[
1
]);
recv_switch_channels_
.
push_back
(
xpu_channels_
[
0
]);
}
brpc
::
Channel
*
channel
=
recv_switch_channels_
[
0
].
get
();
::
paddle
::
distributed
::
PsService_Stub
stub
(
channel
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录