Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
ea8eb4cf
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
ea8eb4cf
编写于
1月 13, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mge/distributed): scalar support for distributed functions
GitOrigin-RevId: 53f3575baf58d709d752618e90cdec6f93b631e5
上级
b83c77e1
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
41 addition
and
22 deletion
+41
-22
imperative/python/megengine/distributed/functional.py
imperative/python/megengine/distributed/functional.py
+20
-1
imperative/python/test/unit/functional/test_functional_distributed.py
...ython/test/unit/functional/test_functional_distributed.py
+21
-21
未找到文件。
imperative/python/megengine/distributed/functional.py
浏览文件 @
ea8eb4cf
...
...
@@ -11,6 +11,7 @@ from typing import Optional, Tuple
from
..core._imperative_rt.core2
import
apply
from
..core.autodiff.grad
import
_grad_manager_dict
from
..core.ops.builtin
import
CollectiveComm
,
Copy
,
PyOpBase
,
RemoteRecv
,
RemoteSend
from
..core.tensor.utils
import
isscalar
,
setscalar
from
..device
import
get_default_device
from
..tensor
import
Tensor
from
.group
import
WORLD
,
Group
,
get_backend
,
get_client
,
get_mm_server_addr
,
get_rank
...
...
@@ -50,7 +51,18 @@ def collective_comm(inp, mode, group, device):
backend
=
get_backend
(),
comp_node
=
device
,
)
return
apply
(
op
,
inp
)[
0
]
(
result
,)
=
apply
(
op
,
inp
)
# assume all workers have homogeneous shape
if
mode
in
(
CollectiveComm
.
Mode
.
REDUCE_SUM
,
CollectiveComm
.
Mode
.
BROADCAST
,
CollectiveComm
.
Mode
.
ALL_REDUCE_SUM
,
CollectiveComm
.
Mode
.
ALL_REDUCE_MAX
,
CollectiveComm
.
Mode
.
ALL_REDUCE_MIN
,
):
if
isscalar
(
inp
):
setscalar
(
result
)
return
result
def
reduce_sum
(
...
...
@@ -289,6 +301,11 @@ def remote_recv(
g
.
wrt
(
inp
)
g
.
_refkeeper
.
append
(
inp
)
_isscalar
=
False
if
len
(
shape
)
==
0
:
shape
=
(
1
,)
_isscalar
=
True
op
=
RemoteRecv
()
op
.
key
=
key
op
.
cn
=
device
...
...
@@ -298,4 +315,6 @@ def remote_recv(
op
.
rank_from
=
src_rank
(
ret
,)
=
apply
(
_RemoteRecv
(
op
),
inp
)
if
_isscalar
:
setscalar
(
ret
)
return
ret
imperative/python/test/unit/functional/test_functional_distributed.py
浏览文件 @
ea8eb4cf
...
...
@@ -13,7 +13,7 @@ import pytest
import
megengine
as
mge
import
megengine.distributed
as
dist
from
megengine
import
Parameter
,
Tensor
,
tensor
from
megengine
import
Parameter
,
tensor
from
megengine.core._imperative_rt.core2
import
sync
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.distributed.helper
import
get_device_count_by_fork
...
...
@@ -53,14 +53,14 @@ def test_reduce_sum():
assert
np
.
allclose
(
output
.
numpy
(),
0
)
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
)
y
=
np
.
random
.
rand
(
*
shape
)
z
=
x
+
y
data
=
(
x
,
y
)
expect
=
(
z
,
None
)
worker
(
data
,
expect
)
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
...
...
@@ -81,13 +81,13 @@ def test_broadcast():
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
)
y
=
x
+
1
data
=
(
x
,
y
)
expect
=
(
x
,
x
)
worker
(
data
,
expect
)
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
...
...
@@ -164,14 +164,14 @@ def test_all_reduce_sum():
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
)
y
=
np
.
random
.
rand
(
*
shape
)
z
=
x
+
y
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
...
...
@@ -192,14 +192,14 @@ def test_all_reduce_max():
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
)
y
=
np
.
random
.
rand
(
*
shape
)
z
=
np
.
maximum
(
x
,
y
)
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
...
...
@@ -220,14 +220,14 @@ def test_all_reduce_min():
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
)
.
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
)
y
=
np
.
random
.
rand
(
*
shape
)
z
=
np
.
minimum
(
x
,
y
)
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
...
...
@@ -327,18 +327,18 @@ def test_all_to_all():
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
def
test_io_remote
():
val
=
np
.
random
.
rand
(
4
,
5
).
astype
(
np
.
float32
)
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
():
def
worker
(
val
,
shape
):
rank
=
dist
.
get_rank
()
if
rank
==
0
:
# remote send
x
=
T
ensor
(
val
,
device
=
"gpu0"
)
x
=
t
ensor
(
val
,
device
=
"gpu0"
)
remote_send
(
x
,
1
)
sync
()
else
:
# remote recv
y
=
remote_recv
(
0
,
val
.
shape
,
val
.
dtype
)
y
=
remote_recv
(
0
,
shape
,
np
.
float32
)
assert
y
.
device
==
"gpu1"
np
.
testing
.
assert_almost_equal
(
val
,
y
.
numpy
())
worker
()
for
shape
in
[(),
(
1
,),
(
4
,
5
)]:
val
=
np
.
random
.
rand
(
*
shape
)
worker
(
val
,
shape
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录