Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
9d928e7f
MegEngine
项目概览
MegEngine 天元
/
MegEngine
接近 2 年 前同步成功
通知
414
Star
4708
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9d928e7f
编写于
12月 26, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mge/distributed): sync interpreter for distribtued launcher
GitOrigin-RevId: 8a88c272a1eae8e633eb68dcbb77a00a1c943f0e
上级
4e9be159
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
199 addition
and
312 deletion
+199
-312
imperative/python/megengine/distributed/launcher.py
imperative/python/megengine/distributed/launcher.py
+69
-23
imperative/python/test/integration/test_dp_correctness.py
imperative/python/test/integration/test_dp_correctness.py
+0
-1
imperative/python/test/integration/test_param_pack.py
imperative/python/test/integration/test_param_pack.py
+0
-1
imperative/python/test/unit/autodiff/test_grad_manger.py
imperative/python/test/unit/autodiff/test_grad_manger.py
+0
-2
imperative/python/test/unit/core/test_autodiff.py
imperative/python/test/unit/core/test_autodiff.py
+25
-42
imperative/python/test/unit/functional/test_functional_distributed.py
...ython/test/unit/functional/test_functional_distributed.py
+96
-218
imperative/python/test/unit/module/test_batchnorm.py
imperative/python/test/unit/module/test_batchnorm.py
+9
-24
imperative/python/test/unit/quantization/test_observer.py
imperative/python/test/unit/quantization/test_observer.py
+0
-1
未找到文件。
imperative/python/megengine/distributed/launcher.py
浏览文件 @
9d928e7f
...
@@ -6,59 +6,105 @@
...
@@ -6,59 +6,105 @@
# Unless required by applicable law or agreed to in writing,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
functools
import
multiprocessing
as
mp
import
multiprocessing
as
mp
from
.group
import
init_process_group
from
..core._imperative_rt
import
sync
from
.group
import
group_barrier
,
init_process_group
from
.helper
import
get_device_count_by_fork
from
.helper
import
get_device_count_by_fork
from
.server
import
Server
from
.server
import
Server
from
.util
import
get_free_ports
from
.util
import
get_free_ports
def
_run_wrapped
(
func
,
master_ip
,
port
,
world_size
,
rank
,
dev
,
args
,
kwargs
):
def
_run_wrapped
(
func
,
is_multimachine
,
master_ip
,
port
,
world_size
,
rank
,
dev
,
args
,
kwargs
):
"""Init distributed process group and run wrapped function."""
"""Init distributed process group and run wrapped function."""
init_process_group
(
init_process_group
(
master_ip
=
master_ip
,
port
=
port
,
world_size
=
world_size
,
rank
=
rank
,
device
=
dev
master_ip
=
master_ip
,
port
=
port
,
world_size
=
world_size
,
rank
=
rank
,
device
=
dev
)
)
if
is_multimachine
:
group_barrier
()
func
(
*
args
,
**
kwargs
)
func
(
*
args
,
**
kwargs
)
sync
()
if
is_multimachine
:
group_barrier
()
def
launcher
(
func
):
class
launcher
:
"""Decorator for launching multiple processes in single-machine multi-gpu training."""
"""Decorator for launching multiple processes in single-machine multi-gpu training.
:param func: the function you want to launch in distributed mode.
:param n_gpus: how many devices each node.
:param world_size: how many devices totally.
:param rank_start: start number for rank.
:param master_ip: ip address for master node (where the rank 0 is).
:param port: server port for distributed server.
"""
n_gpus
=
get_device_count_by_fork
(
"gpu"
)
def
__new__
(
cls
,
*
args
,
**
kwargs
):
if
not
args
:
return
functools
.
partial
(
cls
,
**
kwargs
)
return
super
().
__new__
(
cls
)
def
wrapper
(
*
args
,
**
kwargs
):
def
__init__
(
master_ip
=
"localhost"
self
,
server
=
Server
()
func
,
port
=
server
.
py_server_port
n_gpus
=
None
,
world_size
=
None
,
rank_start
=
0
,
master_ip
=
"localhost"
,
port
=
0
,
):
self
.
func
=
func
self
.
n_gpus
=
n_gpus
if
n_gpus
is
not
None
else
get_device_count_by_fork
(
"gpu"
)
self
.
world_size
=
world_size
if
world_size
is
not
None
else
self
.
n_gpus
self
.
rank_start
=
rank_start
self
.
master_ip
=
master_ip
self
.
port
=
port
# master node create server
if
self
.
rank_start
==
0
:
self
.
server
=
Server
(
self
.
port
)
self
.
port
=
self
.
server
.
py_server_port
else
:
assert
self
.
port
!=
0
,
"you have to assign a port for distributed server"
def
__call__
(
self
,
*
args
,
**
kwargs
):
procs
=
[]
procs
=
[]
for
rank
in
range
(
n_gpus
):
for
dev
in
range
(
self
.
n_gpus
):
p
=
mp
.
Process
(
p
=
mp
.
Process
(
target
=
_run_wrapped
,
target
=
_run_wrapped
,
args
=
(
func
,
master_ip
,
port
,
n_gpus
,
rank
,
rank
,
args
,
kwargs
),
args
=
(
self
.
func
,
self
.
world_size
>
self
.
n_gpus
,
self
.
master_ip
,
self
.
port
,
self
.
world_size
,
dev
+
self
.
rank_start
,
dev
,
args
,
kwargs
,
),
)
)
p
.
start
()
p
.
start
()
procs
.
append
(
p
)
procs
.
append
(
p
)
ranks
=
[
rank
for
rank
in
range
(
n_gpus
)]
devs
=
list
(
range
(
self
.
n_gpus
))
while
len
(
rank
s
)
>
0
:
while
len
(
dev
s
)
>
0
:
left
=
[]
left
=
[]
# check all processes in one second
# check all processes in one second
time_to_wait
=
1.0
/
len
(
rank
s
)
time_to_wait
=
1.0
/
len
(
dev
s
)
for
rank
in
rank
s
:
for
dev
in
dev
s
:
procs
[
rank
].
join
(
time_to_wait
)
procs
[
dev
].
join
(
time_to_wait
)
code
=
procs
[
rank
].
exitcode
code
=
procs
[
dev
].
exitcode
# terminate processes if one of them has failed
# terminate processes if one of them has failed
if
code
!=
0
and
code
!=
None
:
if
code
!=
0
and
code
!=
None
:
for
i
in
rank
s
:
for
i
in
dev
s
:
procs
[
i
].
terminate
()
procs
[
i
].
terminate
()
assert
(
assert
(
code
==
0
or
code
==
None
code
==
0
or
code
==
None
),
"subprocess {} exit with code {}"
.
format
(
rank
,
code
)
),
"subprocess {} exit with code {}"
.
format
(
dev
+
self
.
rank_start
,
code
)
if
code
==
None
:
if
code
==
None
:
left
.
append
(
rank
)
left
.
append
(
dev
)
ranks
=
left
devs
=
left
return
wrapper
imperative/python/test/integration/test_dp_correctness.py
浏览文件 @
9d928e7f
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
# Unless required by applicable law or agreed to in writing,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
multiprocessing
as
mp
import
os
import
os
import
platform
import
platform
import
re
import
re
...
...
imperative/python/test/integration/test_param_pack.py
浏览文件 @
9d928e7f
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
# Unless required by applicable law or agreed to in writing,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
multiprocessing
as
mp
import
platform
import
platform
import
numpy
as
np
import
numpy
as
np
...
...
imperative/python/test/unit/autodiff/test_grad_manger.py
浏览文件 @
9d928e7f
...
@@ -17,7 +17,6 @@ import megengine.functional as F
...
@@ -17,7 +17,6 @@ import megengine.functional as F
import
megengine.module
as
M
import
megengine.module
as
M
import
megengine.optimizer
as
optim
import
megengine.optimizer
as
optim
from
megengine.autodiff
import
GradManager
from
megengine.autodiff
import
GradManager
from
megengine.core._imperative_rt.imperative
import
sync
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.jit
import
trace
from
megengine.jit
import
trace
...
@@ -135,6 +134,5 @@ def test_remote_grad():
...
@@ -135,6 +134,5 @@ def test_remote_grad():
for
func
in
train_funcs
:
for
func
in
train_funcs
:
for
i
in
range
(
3
):
for
i
in
range
(
3
):
func
(
x
)
func
(
x
)
sync
()
worker
()
worker
()
imperative/python/test/unit/core/test_autodiff.py
浏览文件 @
9d928e7f
...
@@ -17,7 +17,6 @@ import megengine as mge
...
@@ -17,7 +17,6 @@ import megengine as mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
import
megengine.functional
as
F
import
megengine.functional
as
F
from
megengine.core._imperative_rt
import
TensorAttr
,
imperative
from
megengine.core._imperative_rt
import
TensorAttr
,
imperative
from
megengine.core._imperative_rt.imperative
import
sync
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.ops.builtin
import
Elemwise
from
megengine.core.ops.builtin
import
Elemwise
from
megengine.core.tensor.raw_tensor
import
as_raw_tensor
from
megengine.core.tensor.raw_tensor
import
as_raw_tensor
...
@@ -65,47 +64,31 @@ def save_to(self, name="grad"):
...
@@ -65,47 +64,31 @@ def save_to(self, name="grad"):
def
test_dist_grad
():
def
test_dist_grad
():
world_size
=
2
world_size
=
2
x_np
=
np
.
random
.
rand
(
10
).
astype
(
"float32"
)
x_np
=
np
.
random
.
rand
(
10
).
astype
(
"float32"
)
server
=
dist
.
Server
()
port
=
server
.
py_server_port
@
dist
.
launcher
def
worker
():
def
worker0
():
rank
=
dist
.
get_rank
()
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
0
,
0
)
if
rank
==
0
:
mge
.
device
.
set_default_device
(
"gpu0"
)
grad
=
Grad
()
grad
=
Grad
()
x
=
as_tensor
(
x_np
)
x
=
as_tensor
(
x_np
)
grad
.
wrt
(
x
,
callback
=
save_to
(
x
))
grad
.
wrt
(
x
,
callback
=
save_to
(
x
))
# need a placeholder to trace operator
# need a placeholder to trace operator
send_x
=
remote_send
(
x
,
1
)
send_x
=
remote_send
(
x
,
1
)
recv_x
=
remote_recv
(
1
,
x_np
.
shape
,
x_np
.
dtype
)
recv_x
=
remote_recv
(
1
,
x_np
.
shape
,
x_np
.
dtype
,
"gpu0"
)
y
=
recv_x
*
recv_x
y
=
recv_x
*
recv_x
grad
([
y
],
[
as_tensor
(
np
.
ones_like
(
x_np
))])
grad
([
y
],
[
as_tensor
(
np
.
ones_like
(
x_np
))])
np
.
testing
.
assert_almost_equal
(
x
.
grad
.
numpy
(),
x
.
numpy
()
*
2
)
np
.
testing
.
assert_almost_equal
(
x
.
grad
.
numpy
(),
x
.
numpy
()
*
2
)
elif
rank
==
1
:
grad
=
Grad
()
def
worker1
():
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
1
,
1
)
recv_x
=
remote_recv
(
0
,
x_np
.
shape
,
x_np
.
dtype
)
mge
.
device
.
set_default_device
(
"gpu1"
)
send_x
=
remote_send
(
recv_x
,
0
)
grad
=
Grad
()
grad
([],
[])
recv_x
=
remote_recv
(
0
,
x_np
.
shape
,
x_np
.
dtype
,
"gpu1"
)
send_x
=
remote_send
(
recv_x
,
0
)
worker
()
grad
([],
[])
# sync because grad has a send operator
sync
()
send_x
.
device
.
_cn
.
_sync_all
()
import
multiprocessing
as
mp
p0
=
mp
.
Process
(
target
=
worker0
)
p1
=
mp
.
Process
(
target
=
worker1
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
def
test_grad
():
def
test_grad
():
...
...
imperative/python/test/unit/functional/test_functional_distributed.py
浏览文件 @
9d928e7f
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
# Unless required by applicable law or agreed to in writing,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
multiprocessing
as
mp
import
platform
import
platform
import
numpy
as
np
import
numpy
as
np
...
@@ -16,6 +15,7 @@ import megengine as mge
...
@@ -16,6 +15,7 @@ import megengine as mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
from
megengine
import
Parameter
,
Tensor
,
tensor
from
megengine
import
Parameter
,
Tensor
,
tensor
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.functional.distributed
import
(
from
megengine.functional.distributed
import
(
all_gather
,
all_gather
,
all_reduce_max
,
all_reduce_max
,
...
@@ -38,20 +38,16 @@ from megengine.functional.distributed import (
...
@@ -38,20 +38,16 @@ from megengine.functional.distributed import (
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_sum
():
def
test_reduce_sum
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
reduce_sum
(
inp
)
output
=
reduce_sum
(
inp
)
if
rank
==
0
:
if
rank
==
0
:
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
else
:
else
:
assert
np
.
allclose
(
output
.
numpy
(),
0
)
assert
np
.
allclose
(
output
.
numpy
(),
0
)
...
@@ -59,16 +55,9 @@ def test_reduce_sum():
...
@@ -59,16 +55,9 @@ def test_reduce_sum():
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
x
+
y
z
=
x
+
y
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
None
,
port
))
expect
=
(
z
,
None
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -80,33 +69,22 @@ def test_reduce_sum():
...
@@ -80,33 +69,22 @@ def test_reduce_sum():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_broadcast
():
def
test_broadcast
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
broadcast
(
inp
)
output
=
broadcast
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
x
+
1
y
=
x
+
1
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
x
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
x
,
port
))
expect
=
(
x
,
x
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -118,34 +96,23 @@ def test_broadcast():
...
@@ -118,34 +96,23 @@ def test_broadcast():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_all_gather
():
def
test_all_gather
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
all_gather
(
inp
)
output
=
all_gather
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
np
.
concatenate
((
x
,
y
))
z
=
np
.
concatenate
((
x
,
y
))
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
z
,
port
))
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -157,34 +124,23 @@ def test_all_gather():
...
@@ -157,34 +124,23 @@ def test_all_gather():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_scatter_sum
():
def
test_reduce_scatter_sum
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
reduce_scatter_sum
(
inp
)
output
=
reduce_scatter_sum
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
x
+
y
z
=
x
+
y
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
[:
shape
[
0
]
//
2
],
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
z
[
shape
[
0
]
//
2
:],
port
))
expect
=
(
z
[:
shape
[
0
]
//
2
],
z
[
shape
[
0
]
//
2
:])
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
4
),
(
8
,
10
),
(
88
,
44
)]:
for
shape
in
[(
2
,
4
),
(
8
,
10
),
(
88
,
44
)]:
check
(
shape
)
check
(
shape
)
...
@@ -196,34 +152,23 @@ def test_reduce_scatter_sum():
...
@@ -196,34 +152,23 @@ def test_reduce_scatter_sum():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_sum
():
def
test_all_reduce_sum
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
all_reduce_sum
(
inp
)
output
=
all_reduce_sum
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
x
+
y
z
=
x
+
y
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
z
,
port
))
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -235,34 +180,23 @@ def test_all_reduce_sum():
...
@@ -235,34 +180,23 @@ def test_all_reduce_sum():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_max
():
def
test_all_reduce_max
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
all_reduce_max
(
inp
)
output
=
all_reduce_max
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
np
.
maximum
(
x
,
y
)
z
=
np
.
maximum
(
x
,
y
)
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
z
,
port
))
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -274,34 +208,23 @@ def test_all_reduce_max():
...
@@ -274,34 +208,23 @@ def test_all_reduce_max():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_min
():
def
test_all_reduce_min
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
all_reduce_min
(
inp
)
output
=
all_reduce_min
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
np
.
minimum
(
x
,
y
)
z
=
np
.
minimum
(
x
,
y
)
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
z
,
port
))
expect
=
(
z
,
z
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -313,20 +236,16 @@ def test_all_reduce_min():
...
@@ -313,20 +236,16 @@ def test_all_reduce_min():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_gather
():
def
test_gather
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
gather
(
inp
)
output
=
gather
(
inp
)
if
rank
==
0
:
if
rank
==
0
:
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
else
:
else
:
assert
np
.
allclose
(
output
.
numpy
(),
0
)
assert
np
.
allclose
(
output
.
numpy
(),
0
)
...
@@ -334,16 +253,9 @@ def test_gather():
...
@@ -334,16 +253,9 @@ def test_gather():
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
z
=
np
.
concatenate
((
x
,
y
))
z
=
np
.
concatenate
((
x
,
y
))
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
z
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
None
,
port
))
expect
=
(
z
,
None
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
99
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -355,33 +267,22 @@ def test_gather():
...
@@ -355,33 +267,22 @@ def test_gather():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_scatter
():
def
test_scatter
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
scatter
(
inp
)
output
=
scatter
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
x
+
1
y
=
x
+
1
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
x
[:
shape
[
0
]
//
2
],
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
x
[
shape
[
0
]
//
2
:],
port
))
expect
=
(
x
[:
shape
[
0
]
//
2
],
x
[
shape
[
0
]
//
2
:])
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -393,35 +294,24 @@ def test_scatter():
...
@@ -393,35 +294,24 @@ def test_scatter():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_all_to_all
():
def
test_all_to_all
():
world_size
=
2
@
dist
.
launcher
(
n_gpus
=
2
)
server
=
dist
.
Server
()
def
worker
(
data
,
expect
):
port
=
server
.
py_server_port
rank
=
dist
.
get_rank
()
inp
=
tensor
(
data
[
rank
])
def
worker
(
rank
,
data
,
expect
,
port
):
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
return
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
inp
=
tensor
(
data
)
output
=
all_to_all
(
inp
)
output
=
all_to_all
(
inp
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
)
assert
np
.
allclose
(
output
.
numpy
(),
expect
[
rank
]
)
def
check
(
shape
):
def
check
(
shape
):
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
y
=
np
.
random
.
rand
(
*
shape
).
astype
(
"float32"
)
a
=
np
.
concatenate
((
x
[:
shape
[
0
]
//
2
],
y
[:
shape
[
0
]
//
2
]))
a
=
np
.
concatenate
((
x
[:
shape
[
0
]
//
2
],
y
[:
shape
[
0
]
//
2
]))
b
=
np
.
concatenate
((
x
[
shape
[
0
]
//
2
:],
y
[
shape
[
0
]
//
2
:]))
b
=
np
.
concatenate
((
x
[
shape
[
0
]
//
2
:],
y
[
shape
[
0
]
//
2
:]))
p0
=
mp
.
Process
(
target
=
worker
,
args
=
(
0
,
x
,
a
,
port
))
data
=
(
x
,
y
)
p1
=
mp
.
Process
(
target
=
worker
,
args
=
(
1
,
y
,
b
,
port
))
expect
=
(
a
,
b
)
worker
(
data
,
expect
)
p0
.
start
()
p1
.
start
()
p0
.
join
(
10
)
p1
.
join
(
10
)
assert
p0
.
exitcode
==
0
and
p1
.
exitcode
==
0
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)]:
for
shape
in
[(
2
,
3
),
(
8
,
10
),
(
100
,
77
)]:
check
(
shape
)
check
(
shape
)
...
@@ -433,33 +323,21 @@ def test_all_to_all():
...
@@ -433,33 +323,21 @@ def test_all_to_all():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_io_remote
():
def
test_io_remote
():
world_size
=
2
server
=
dist
.
Server
()
port
=
server
.
py_server_port
val
=
np
.
random
.
rand
(
4
,
5
).
astype
(
np
.
float32
)
val
=
np
.
random
.
rand
(
4
,
5
).
astype
(
np
.
float32
)
def
worker
(
rank
):
@
dist
.
launcher
(
n_gpus
=
2
)
if
mge
.
get_device_count
(
"gpu"
)
<
world_size
:
def
worker
()
:
return
rank
=
dist
.
get_rank
()
if
rank
==
0
:
# remote send
if
rank
==
0
:
# remote send
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
x
=
Tensor
(
val
,
device
=
"gpu0"
)
x
=
Tensor
(
val
,
device
=
"gpu0"
)
y
=
remote_send
(
x
,
1
)
y
=
remote_send
(
x
,
1
)
assert
y
.
numpy
()[
0
]
==
0
assert
y
.
numpy
()[
0
]
==
0
else
:
# remote recv
else
:
# remote recv
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
y
=
remote_recv
(
0
,
val
.
shape
,
val
.
dtype
)
y
=
remote_recv
(
0
,
val
.
shape
,
val
.
dtype
)
assert
y
.
device
==
"gpu1"
assert
y
.
device
==
"gpu1"
np
.
testing
.
assert_almost_equal
(
val
,
y
.
numpy
())
np
.
testing
.
assert_almost_equal
(
val
,
y
.
numpy
())
procs
=
[]
worker
()
for
rank
in
range
(
world_size
):
p
=
mp
.
Process
(
target
=
worker
,
args
=
(
rank
,))
p
.
start
()
procs
.
append
(
p
)
for
p
in
procs
:
p
.
join
(
10
)
assert
p
.
exitcode
==
0
imperative/python/test/unit/module/test_batchnorm.py
浏览文件 @
9d928e7f
...
@@ -7,7 +7,6 @@
...
@@ -7,7 +7,6 @@
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
functools
import
functools
import
multiprocessing
as
mp
import
platform
import
platform
import
numpy
as
np
import
numpy
as
np
...
@@ -17,6 +16,7 @@ import megengine as mge
...
@@ -17,6 +16,7 @@ import megengine as mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
from
megengine
import
Tensor
from
megengine
import
Tensor
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.module
import
BatchNorm1d
,
BatchNorm2d
,
SyncBatchNorm
from
megengine.module
import
BatchNorm1d
,
BatchNorm2d
,
SyncBatchNorm
_assert_allclose
=
functools
.
partial
(
np
.
testing
.
assert_allclose
,
atol
=
5e-6
,
rtol
=
5e-6
)
_assert_allclose
=
functools
.
partial
(
np
.
testing
.
assert_allclose
,
atol
=
5e-6
,
rtol
=
5e-6
)
...
@@ -28,6 +28,7 @@ _assert_allclose = functools.partial(np.testing.assert_allclose, atol=5e-6, rtol
...
@@ -28,6 +28,7 @@ _assert_allclose = functools.partial(np.testing.assert_allclose, atol=5e-6, rtol
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_syncbn
():
def
test_syncbn
():
nr_chan
=
8
nr_chan
=
8
...
@@ -41,15 +42,14 @@ def test_syncbn():
...
@@ -41,15 +42,14 @@ def test_syncbn():
server
=
dist
.
Server
()
server
=
dist
.
Server
()
port
=
server
.
py_server_port
port
=
server
.
py_server_port
def
worker
(
rank
,
data
,
yv_expect
,
running_mean
,
running_var
):
@
dist
.
launcher
(
n_gpus
=
2
)
if
mge
.
get_device_count
(
"gpu"
)
<
nr_ranks
:
def
worker
(
data
,
yv_expect
,
running_mean
,
running_var
):
return
rank
=
dist
.
get_rank
()
dist
.
init_process_group
(
"localhost"
,
port
,
nr_ranks
,
rank
,
rank
)
bn
=
SyncBatchNorm
(
nr_chan
,
momentum
=
momentum
,
eps
=
eps
)
bn
=
SyncBatchNorm
(
nr_chan
,
momentum
=
momentum
,
eps
=
eps
)
for
i
in
range
(
steps
):
for
i
in
range
(
steps
):
yv
=
bn
(
Tensor
(
data
[
i
]))
yv
=
bn
(
Tensor
(
data
[
rank
][
i
]))
_assert_allclose
(
yv
.
numpy
(),
yv_expect
)
_assert_allclose
(
yv
.
numpy
(),
yv_expect
[
rank
]
)
_assert_allclose
(
bn
.
running_mean
.
numpy
(),
running_mean
)
_assert_allclose
(
bn
.
running_mean
.
numpy
(),
running_mean
)
_assert_allclose
(
bn
.
running_var
.
numpy
(),
running_var
)
_assert_allclose
(
bn
.
running_var
.
numpy
(),
running_var
)
...
@@ -77,24 +77,9 @@ def test_syncbn():
...
@@ -77,24 +77,9 @@ def test_syncbn():
for
j
in
range
(
steps
):
for
j
in
range
(
steps
):
data
[
i
].
append
(
xv
[
j
][:,
:,
:,
i
*
8
:
i
*
8
+
8
])
data
[
i
].
append
(
xv
[
j
][:,
:,
:,
i
*
8
:
i
*
8
+
8
])
procs
=
[]
yv_expect
=
[
yv_expect
[:,
:,
:,
i
*
8
:
i
*
8
+
8
]
for
i
in
range
(
nr_ranks
)]
for
rank
in
range
(
nr_ranks
):
p
=
mp
.
Process
(
target
=
worker
,
args
=
(
rank
,
data
[
rank
],
yv_expect
[:,
:,
:,
rank
*
8
:
rank
*
8
+
8
],
running_mean
,
running_var
,
),
)
p
.
start
()
procs
.
append
(
p
)
for
p
in
procs
:
worker
(
data
,
yv_expect
,
running_mean
,
running_var
)
p
.
join
(
10
)
assert
p
.
exitcode
==
0
def
test_batchnorm
():
def
test_batchnorm
():
...
...
imperative/python/test/unit/quantization/test_observer.py
浏览文件 @
9d928e7f
import
multiprocessing
as
mp
import
platform
import
platform
import
numpy
as
np
import
numpy
as
np
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录