Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
0b4a7679
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
0b4a7679
编写于
7月 22, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mge/distributed): enable uint8 for collective communication
GitOrigin-RevId: 3305c0cf14c4d213d303e59fb3a56ba4972a1244
上级
a22b2cf4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
162 addition
and
39 deletion
+162
-39
imperative/python/test/unit/functional/test_functional_distributed.py
...ython/test/unit/functional/test_functional_distributed.py
+160
-39
src/opr-mm/impl/megray_helper.cpp
src/opr-mm/impl/megray_helper.cpp
+2
-0
未找到文件。
imperative/python/test/unit/functional/test_functional_distributed.py
浏览文件 @
0b4a7679
...
...
@@ -32,10 +32,7 @@ from megengine.functional.distributed import (
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_sum
(
shape
):
def
run_reduce_sum
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -46,8 +43,8 @@ def test_reduce_sum(shape):
else
:
assert
output
is
None
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
x
+
y
data
=
(
x
,
y
)
expect
=
(
z
,
None
)
...
...
@@ -57,7 +54,18 @@ def test_reduce_sum(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_broadcast
(
shape
):
def
test_reduce_sum_multishape
(
shape
):
run_reduce_sum
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_sum_multidtype
(
dtype
):
run_reduce_sum
((
8
,
10
),
dtype
)
def
run_broadcast
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -65,7 +73,7 @@ def test_broadcast(shape):
output
=
broadcast
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
x
+
1
data
=
(
x
,
y
)
expect
=
(
x
,
x
)
...
...
@@ -73,9 +81,20 @@ def test_broadcast(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_gather
(
shape
):
def
test_broadcast_multishape
(
shape
):
run_broadcast
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_broadcast_multidtype
(
dtype
):
run_broadcast
((
8
,
10
),
dtype
)
def
run_all_gather
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -83,8 +102,8 @@ def test_all_gather(shape):
output
=
all_gather
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
np
.
concatenate
((
x
,
y
))
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
...
...
@@ -92,9 +111,20 @@ def test_all_gather(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
88
,
44
)],
ids
=
str
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_gather_multishape
(
shape
):
run_all_gather
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_scatter_sum
(
shape
):
def
test_all_gather_multidtype
(
dtype
):
run_all_gather
((
8
,
10
),
dtype
)
def
run_reduce_scatter_sum
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -102,8 +132,8 @@ def test_reduce_scatter_sum(shape):
output
=
reduce_scatter_sum
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
x
+
y
data
=
(
x
,
y
)
expect
=
(
z
[:
shape
[
0
]
//
2
],
z
[
shape
[
0
]
//
2
:])
...
...
@@ -111,9 +141,20 @@ def test_reduce_scatter_sum(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
88
,
44
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_sum
(
shape
):
def
test_reduce_scatter_sum_multishape
(
shape
):
run_reduce_scatter_sum
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_scatter_sum_multidtype
(
dtype
):
run_reduce_scatter_sum
((
8
,
10
),
dtype
)
def
run_all_reduce_sum
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -121,8 +162,8 @@ def test_all_reduce_sum(shape):
output
=
all_reduce_sum
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
x
+
y
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
...
...
@@ -132,7 +173,18 @@ def test_all_reduce_sum(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_max
(
shape
):
def
test_all_reduce_sum_multishape
(
shape
):
run_all_reduce_sum
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_sum_multidtype
(
dtype
):
run_all_reduce_sum
((
8
,
10
),
dtype
)
def
run_all_reduce_max
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -140,8 +192,8 @@ def test_all_reduce_max(shape):
output
=
all_reduce_max
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
np
.
maximum
(
x
,
y
)
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
...
...
@@ -151,7 +203,18 @@ def test_all_reduce_max(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_min
(
shape
):
def
test_all_reduce_max_multishape
(
shape
):
run_all_reduce_max
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_max_multidtype
(
dtype
):
run_all_reduce_max
((
8
,
10
),
dtype
)
def
run_all_reduce_min
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -159,8 +222,8 @@ def test_all_reduce_min(shape):
output
=
all_reduce_min
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
np
.
minimum
(
x
,
y
)
data
=
(
x
,
y
)
expect
=
(
z
,
z
)
...
...
@@ -168,9 +231,20 @@ def test_all_reduce_min(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
),
(
1
,),
(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_gather
(
shape
):
def
test_all_reduce_min_multishape
(
shape
):
run_all_reduce_min
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_min_multidtype
(
dtype
):
run_all_reduce_min
((
8
,
10
),
dtype
)
def
run_gather
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -181,8 +255,8 @@ def test_gather(shape):
else
:
assert
output
is
None
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
z
=
np
.
concatenate
((
x
,
y
))
data
=
(
x
,
y
)
expect
=
(
z
,
None
)
...
...
@@ -190,9 +264,20 @@ def test_gather(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_gather_multishape
(
shape
):
run_gather
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_scatter
(
shape
):
def
test_gather_multidtype
(
dtype
):
run_gather
((
8
,
10
),
dtype
)
def
run_scatter
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -200,7 +285,7 @@ def test_scatter(shape):
output
=
scatter
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
x
+
1
data
=
(
x
,
y
)
expect
=
(
x
[:
shape
[
0
]
//
2
],
x
[
shape
[
0
]
//
2
:])
...
...
@@ -210,7 +295,18 @@ def test_scatter(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_to_all
(
shape
):
def
test_scatter_multishape
(
shape
):
run_scatter
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_scatter_multidtype
(
dtype
):
run_scatter
((
8
,
10
),
dtype
)
def
run_all_to_all
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
data
,
expect
):
rank
=
dist
.
get_rank
()
...
...
@@ -218,8 +314,8 @@ def test_all_to_all(shape):
output
=
all_to_all
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
])
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
y
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
a
=
np
.
concatenate
((
x
[:
shape
[
0
]
//
2
],
y
[:
shape
[
0
]
//
2
]))
b
=
np
.
concatenate
((
x
[
shape
[
0
]
//
2
:],
y
[
shape
[
0
]
//
2
:]))
data
=
(
x
,
y
)
...
...
@@ -228,9 +324,20 @@ def test_all_to_all(shape):
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
4
,
5
)],
ids
=
str
)
def
test_io_remote
(
shape
):
def
test_all_to_all_multishape
(
shape
):
run_all_to_all
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_to_all_multidtype
(
dtype
):
run_all_to_all
((
8
,
10
),
dtype
)
def
run_io_remote
(
shape
,
dtype
):
@
dist
.
launcher
(
n_gpus
=
2
)
def
worker
(
val
,
shape
):
rank
=
dist
.
get_rank
()
...
...
@@ -243,10 +350,24 @@ def test_io_remote(shape):
assert
y
.
device
==
get_default_device
()
np
.
testing
.
assert_almost_equal
(
val
,
y
.
numpy
())
val
=
np
.
random
.
random_sample
(
shape
).
astype
(
"float32"
)
val
=
np
.
random
.
random_sample
(
shape
).
astype
(
dtype
)
worker
(
val
,
shape
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
parametrize
(
"shape"
,
[(),
(
1
,),
(
4
,
5
)],
ids
=
str
)
def
test_io_remote_multishape
(
shape
):
run_io_remote
(
shape
,
"float32"
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float32"
,
"int32"
,
"int8"
,
"uint8"
],
ids
=
str
)
def
test_io_remote_multidtype
(
dtype
):
run_io_remote
((
8
,
10
),
dtype
)
@
pytest
.
mark
.
require_ngpu
(
2
)
def
test_cuda_init_before_fork
():
a
=
mge
.
tensor
(
1
,
device
=
"gpu0"
)
...
...
src/opr-mm/impl/megray_helper.cpp
浏览文件 @
0b4a7679
...
...
@@ -20,6 +20,8 @@ MegRay::DType mgb::opr::get_megray_dtype(megdnn::DType dtype) {
switch
(
dtype
.
enumv
())
{
case
DTypeEnum
::
Int8
:
return
MegRay
::
DType
::
MEGRAY_INT8
;
case
DTypeEnum
::
Uint8
:
return
MegRay
::
DType
::
MEGRAY_UINT8
;
case
DTypeEnum
::
Int32
:
return
MegRay
::
DType
::
MEGRAY_INT32
;
case
DTypeEnum
::
Float32
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录