Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
bb2cb762
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bb2cb762
编写于
3月 18, 2022
作者:
L
lilong12
提交者:
GitHub
3月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use store for gloo process group (#40629)
上级
70726696
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
15 addition
and
30 deletion
+15
-30
paddle/fluid/distributed/collective/ProcessGroupGloo.cc
paddle/fluid/distributed/collective/ProcessGroupGloo.cc
+4
-4
paddle/fluid/distributed/collective/ProcessGroupGloo.h
paddle/fluid/distributed/collective/ProcessGroupGloo.h
+6
-7
paddle/fluid/pybind/distributed_py.cc
paddle/fluid/pybind/distributed_py.cc
+4
-16
python/paddle/fluid/tests/unittests/process_group_gloo.py
python/paddle/fluid/tests/unittests/process_group_gloo.py
+1
-3
未找到文件。
paddle/fluid/distributed/collective/ProcessGroupGloo.cc
浏览文件 @
bb2cb762
...
...
@@ -171,10 +171,10 @@ ProcessGroupGloo::GlooTask::GlooTask(int rank,
"Only CPU place is supported for ProcessGroupGloo."
));
}
ProcessGroupGloo
::
ProcessGroupGloo
(
const
std
::
shared_ptr
<
GlooStore
>&
store
,
int
rank
,
int
world_size
,
const
std
::
shared_ptr
<
GlooOptions
>
options
)
:
ProcessGroup
(
rank
,
world_size
),
_tag
(
0
),
_store
(
store
)
{
ProcessGroupGloo
::
ProcessGroupGloo
(
const
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>&
store
,
int
rank
,
int
world_size
,
const
std
::
shared_ptr
<
GlooOptions
>
options
)
:
ProcessGroup
(
rank
,
world_size
),
_tag
(
0
),
_store
(
new
GlooStore
(
store
)
)
{
_context
=
std
::
make_shared
<
gloo
::
rendezvous
::
Context
>
(
rank
,
world_size
);
auto
prefix_store
=
::
gloo
::
rendezvous
::
PrefixStore
(
std
::
to_string
(
0
),
*
_store
);
...
...
paddle/fluid/distributed/collective/ProcessGroupGloo.h
浏览文件 @
bb2cb762
...
...
@@ -52,8 +52,7 @@ class ProcessGroupGloo : public ProcessGroup {
class
GlooStore
:
public
::
gloo
::
rendezvous
::
Store
{
public:
explicit
GlooStore
(
const
std
::
shared_ptr
<
paddle
::
distributed
::
TCPStore
>&
store
)
explicit
GlooStore
(
const
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>&
store
)
:
_store
(
store
)
{}
~
GlooStore
()
=
default
;
...
...
@@ -87,7 +86,7 @@ class ProcessGroupGloo : public ProcessGroup {
}
protected:
std
::
shared_ptr
<
paddle
::
distributed
::
TCP
Store
>
_store
;
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>
_store
;
};
class
GlooOptions
{
...
...
@@ -100,9 +99,9 @@ class ProcessGroupGloo : public ProcessGroup {
std
::
shared_ptr
<::
gloo
::
transport
::
Device
>
device
;
};
explicit
ProcessGroupGloo
(
const
std
::
shared_ptr
<
GlooStore
>&
store
,
int
rank
,
int
world_size
,
std
::
shared_ptr
<
GlooOptions
>
options
);
explicit
ProcessGroupGloo
(
const
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>&
store
,
int
rank
,
int
world_size
,
std
::
shared_ptr
<
GlooOptions
>
options
);
~
ProcessGroupGloo
()
=
default
;
...
...
@@ -145,7 +144,7 @@ class ProcessGroupGloo : public ProcessGroup {
protected:
uint32_t
_tag
;
std
::
shared_ptr
<
gloo
::
rendezvous
::
Context
>
_context
;
std
::
shared_ptr
<
Gloo
Store
>
_store
;
std
::
shared_ptr
<
::
gloo
::
rendezvous
::
Store
>
_store
;
};
}
// namespace distributed
...
...
paddle/fluid/pybind/distributed_py.cc
浏览文件 @
bb2cb762
...
...
@@ -235,25 +235,13 @@ void BindDistributed(py::module *m) {
py
::
call_guard
<
py
::
gil_scoped_release
>
());
#if defined(PADDLE_WITH_GLOO)
py
::
class_
<
GlooOptions
>
(
*
m
,
"GlooOptions"
)
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"_device"
,
&
GlooOptions
::
device
)
.
def_static
(
"create"
,
&
GlooOptions
::
create
);
py
::
class_
<
GlooStore
,
std
::
shared_ptr
<
GlooStore
>>
(
*
m
,
"GlooStore"
)
.
def
(
py
::
init
(
[](
const
std
::
shared_ptr
<
paddle
::
distributed
::
TCPStore
>
&
store
)
{
return
std
::
make_shared
<
GlooStore
>
(
store
);
}),
py
::
call_guard
<
py
::
gil_scoped_release
>
());
py
::
class_
<
ProcessGroupGloo
,
std
::
shared_ptr
<
ProcessGroupGloo
>>
(
*
m
,
"ProcessGroupGloo"
,
ProcessGroup
)
.
def
(
py
::
init
<
const
std
::
shared_ptr
<
GlooStore
>
&
,
int
,
int
,
std
::
shared_ptr
<
GlooOptions
>
&>
(),
.
def
(
py
::
init
<
const
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>
&
,
int
,
int
,
std
::
shared_ptr
<
GlooOptions
>
&>
(),
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
py
::
init
([](
const
std
::
shared_ptr
<
GlooStore
>
&
store
,
int
rank
,
int
world_size
)
{
.
def
(
py
::
init
([](
const
std
::
shared_ptr
<
paddle
::
distributed
::
Store
>
&
store
,
int
rank
,
int
world_size
)
{
auto
opts
=
GlooOptions
::
create
();
char
*
ifname
=
getenv
(
GLOO_SOCKET_IFNAME_ENV
.
c_str
());
if
(
ifname
&&
strlen
(
ifname
)
>
1
)
{
...
...
python/paddle/fluid/tests/unittests/process_group_gloo.py
浏览文件 @
bb2cb762
...
...
@@ -47,9 +47,7 @@ class TestProcessGroupFp32(unittest.TestCase):
is_master
=
True
if
rank
==
0
else
False
store
=
paddle
.
fluid
.
core
.
TCPStore
(
"127.0.0.1"
,
6172
,
is_master
,
nranks
,
datetime
.
timedelta
(
0
))
gloo_store
=
paddle
.
fluid
.
core
.
GlooStore
(
store
)
opt
=
paddle
.
fluid
.
core
.
GlooOptions
()
pg
=
paddle
.
fluid
.
core
.
ProcessGroupGloo
(
gloo_store
,
rank
,
nranks
)
pg
=
paddle
.
fluid
.
core
.
ProcessGroupGloo
(
store
,
rank
,
nranks
)
# test allreduce sum
# rank 0
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录