Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
207a3463
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
207a3463
编写于
1月 19, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
chore(mge): run get_device_count("gpu") in subprocess
GitOrigin-RevId: 0f0dc001cfc45fc0d04de1a86c27f8bba8185d6b
上级
869a0327
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
119 addition
and
78 deletion
+119
-78
imperative/python/megengine/distributed/helper.py
imperative/python/megengine/distributed/helper.py
+0
-18
imperative/python/megengine/distributed/launcher.py
imperative/python/megengine/distributed/launcher.py
+3
-4
imperative/python/megengine/functional/tensor.py
imperative/python/megengine/functional/tensor.py
+2
-2
imperative/python/test/conftest.py
imperative/python/test/conftest.py
+2
-2
imperative/python/test/integration/test_param_pack.py
imperative/python/test/integration/test_param_pack.py
+0
-1
imperative/python/test/unit/autodiff/test_grad_manger.py
imperative/python/test/unit/autodiff/test_grad_manger.py
+0
-1
imperative/python/test/unit/core/test_autodiff.py
imperative/python/test/unit/core/test_autodiff.py
+0
-1
imperative/python/test/unit/core/test_dtype_quant.py
imperative/python/test/unit/core/test_dtype_quant.py
+2
-3
imperative/python/test/unit/distributed/test_distributed.py
imperative/python/test/unit/distributed/test_distributed.py
+1
-5
imperative/python/test/unit/functional/test_functional.py
imperative/python/test/unit/functional/test_functional.py
+3
-6
imperative/python/test/unit/functional/test_functional_distributed.py
...ython/test/unit/functional/test_functional_distributed.py
+0
-1
imperative/python/test/unit/functional/test_tensor.py
imperative/python/test/unit/functional/test_tensor.py
+0
-1
imperative/python/test/unit/module/test_batchnorm.py
imperative/python/test/unit/module/test_batchnorm.py
+0
-1
imperative/python/test/unit/module/test_qat.py
imperative/python/test/unit/module/test_qat.py
+2
-4
imperative/python/test/unit/quantization/test_observer.py
imperative/python/test/unit/quantization/test_observer.py
+3
-3
imperative/python/test/unit/quantization/test_op.py
imperative/python/test/unit/quantization/test_op.py
+2
-2
imperative/python/test/unit/random/test_rng.py
imperative/python/test/unit/random/test_rng.py
+13
-13
imperative/python/test/unit/utils/test_network_node.py
imperative/python/test/unit/utils/test_network_node.py
+2
-3
src/core/impl/comp_node/cuda/comp_node.cpp
src/core/impl/comp_node/cuda/comp_node.cpp
+84
-7
未找到文件。
imperative/python/megengine/distributed/helper.py
浏览文件 @
207a3463
...
@@ -181,11 +181,6 @@ def synchronized(func: Callable):
...
@@ -181,11 +181,6 @@ def synchronized(func: Callable):
return
wrapper
return
wrapper
def
_get_device_count_worker
(
queue
,
device_type
):
num
=
get_device_count
(
device_type
)
queue
.
put
(
num
)
def
_check_device_initialized
(
device_type
:
str
,
rank
:
int
):
def
_check_device_initialized
(
device_type
:
str
,
rank
:
int
):
try
:
try
:
test
=
Tensor
(
1
,
device
=
(
device_type
+
str
(
rank
)))
test
=
Tensor
(
1
,
device
=
(
device_type
+
str
(
rank
)))
...
@@ -198,19 +193,6 @@ def _check_device_initialized(device_type: str, rank: int):
...
@@ -198,19 +193,6 @@ def _check_device_initialized(device_type: str, rank: int):
raise
RuntimeError
(
errmsg
)
raise
RuntimeError
(
errmsg
)
def
get_device_count_by_fork
(
device_type
:
str
):
"""
Get device count in fork thread.
See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork
for more information.
"""
q
=
mp
.
Queue
()
p
=
mp
.
Process
(
target
=
_get_device_count_worker
,
args
=
(
q
,
device_type
))
p
.
start
()
p
.
join
()
return
q
.
get
()
def
bcast_list_
(
inps
:
list
,
group
:
Group
=
WORLD
):
def
bcast_list_
(
inps
:
list
,
group
:
Group
=
WORLD
):
"""
"""
Broadcast tensors between given group.
Broadcast tensors between given group.
...
...
imperative/python/megengine/distributed/launcher.py
浏览文件 @
207a3463
...
@@ -13,9 +13,10 @@ import queue
...
@@ -13,9 +13,10 @@ import queue
from
..
import
_exit
from
..
import
_exit
from
..core._imperative_rt.core2
import
full_sync
from
..core._imperative_rt.core2
import
full_sync
from
..device
import
get_device_count
from
..logger
import
get_logger
from
..logger
import
get_logger
from
.group
import
_set_machine_ranks
,
group_barrier
,
init_process_group
from
.group
import
_set_machine_ranks
,
group_barrier
,
init_process_group
from
.helper
import
_check_device_initialized
,
get_device_count_by_fork
from
.helper
import
_check_device_initialized
from
.server
import
Client
,
Server
from
.server
import
Client
,
Server
WARN_SUBPROCESS_EXIT_WITHOUT_RETURN
=
(
WARN_SUBPROCESS_EXIT_WITHOUT_RETURN
=
(
...
@@ -91,9 +92,7 @@ class launcher:
...
@@ -91,9 +92,7 @@ class launcher:
backend
=
"auto"
,
backend
=
"auto"
,
):
):
self
.
func
=
func
self
.
func
=
func
self
.
n_gpus
=
(
self
.
n_gpus
=
n_gpus
if
n_gpus
is
not
None
else
get_device_count
(
device_type
)
n_gpus
if
n_gpus
is
not
None
else
get_device_count_by_fork
(
device_type
)
)
self
.
world_size
=
world_size
if
world_size
is
not
None
else
self
.
n_gpus
self
.
world_size
=
world_size
if
world_size
is
not
None
else
self
.
n_gpus
self
.
rank_start
=
rank_start
self
.
rank_start
=
rank_start
self
.
master_ip
=
master_ip
self
.
master_ip
=
master_ip
...
...
imperative/python/megengine/functional/tensor.py
浏览文件 @
207a3463
...
@@ -1188,11 +1188,11 @@ def copy(inp, device=None):
...
@@ -1188,11 +1188,11 @@ def copy(inp, device=None):
import numpy as np
import numpy as np
import platform
import platform
from megengine import tensor
from megengine import tensor
from megengine.d
istributed.helper import get_device_count_by_fork
from megengine.d
evice import get_device_count
import megengine.functional as F
import megengine.functional as F
x = tensor([1, 2, 3], np.int32)
x = tensor([1, 2, 3], np.int32)
if 1 == get_device_count
_by_fork
("gpu"):
if 1 == get_device_count("gpu"):
y = F.copy(x, "cpu1")
y = F.copy(x, "cpu1")
print(y.numpy())
print(y.numpy())
else:
else:
...
...
imperative/python/test/conftest.py
浏览文件 @
207a3463
...
@@ -15,7 +15,7 @@ import megengine.functional
...
@@ -15,7 +15,7 @@ import megengine.functional
import
megengine.module
import
megengine.module
from
megengine
import
Parameter
from
megengine
import
Parameter
from
megengine.core._imperative_rt.core2
import
sync
from
megengine.core._imperative_rt.core2
import
sync
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.experimental.autograd
import
(
from
megengine.experimental.autograd
import
(
disable_higher_order_directive
,
disable_higher_order_directive
,
enable_higher_order_directive
,
enable_higher_order_directive
,
...
@@ -25,7 +25,7 @@ from megengine.module import Linear, Module
...
@@ -25,7 +25,7 @@ from megengine.module import Linear, Module
sys
.
path
.
append
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"helpers"
))
sys
.
path
.
append
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"helpers"
))
_ngpu
=
get_device_count
_by_fork
(
"gpu"
)
_ngpu
=
get_device_count
(
"gpu"
)
@
pytest
.
fixture
(
autouse
=
True
)
@
pytest
.
fixture
(
autouse
=
True
)
...
...
imperative/python/test/integration/test_param_pack.py
浏览文件 @
207a3463
...
@@ -16,7 +16,6 @@ import megengine.autodiff as ad
...
@@ -16,7 +16,6 @@ import megengine.autodiff as ad
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
import
megengine.optimizer
as
optimizer
import
megengine.optimizer
as
optimizer
from
megengine
import
Parameter
,
tensor
from
megengine
import
Parameter
,
tensor
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.module
import
Module
from
megengine.module
import
Module
from
megengine.optimizer
import
SGD
from
megengine.optimizer
import
SGD
...
...
imperative/python/test/unit/autodiff/test_grad_manger.py
浏览文件 @
207a3463
...
@@ -18,7 +18,6 @@ import megengine.functional as F
...
@@ -18,7 +18,6 @@ import megengine.functional as F
import
megengine.module
as
M
import
megengine.module
as
M
import
megengine.optimizer
as
optim
import
megengine.optimizer
as
optim
from
megengine.autodiff
import
GradManager
from
megengine.autodiff
import
GradManager
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.jit
import
trace
from
megengine.jit
import
trace
...
...
imperative/python/test/unit/core/test_autodiff.py
浏览文件 @
207a3463
...
@@ -20,7 +20,6 @@ from megengine.core._imperative_rt import CompNode, TensorAttr, imperative
...
@@ -20,7 +20,6 @@ from megengine.core._imperative_rt import CompNode, TensorAttr, imperative
from
megengine.core._imperative_rt.core2
import
TensorWeakRef
,
apply
,
sync
from
megengine.core._imperative_rt.core2
import
TensorWeakRef
,
apply
,
sync
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.ops.builtin
import
Elemwise
,
Identity
from
megengine.core.ops.builtin
import
Elemwise
,
Identity
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.functional.distributed
import
remote_recv
,
remote_send
from
megengine.functional.distributed
import
remote_recv
,
remote_send
...
...
imperative/python/test/unit/core/test_dtype_quant.py
浏览文件 @
207a3463
...
@@ -31,7 +31,7 @@ from megengine.core.tensor.dtype import (
...
@@ -31,7 +31,7 @@ from megengine.core.tensor.dtype import (
quint4
,
quint4
,
quint8
,
quint8
,
)
)
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.tensor
import
Tensor
from
megengine.tensor
import
Tensor
...
@@ -184,8 +184,7 @@ def test_dtype_int4_ffi_handle():
...
@@ -184,8 +184,7 @@ def test_dtype_int4_ffi_handle():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
!=
0
,
get_device_count
(
"gpu"
)
!=
0
,
reason
=
"TypeCvt to quint4 is not supported on GPU"
,
reason
=
"TypeCvt to quint4 is not supported on GPU"
,
)
)
def
test_quint4_typecvt
():
def
test_quint4_typecvt
():
device
=
"xpux"
device
=
"xpux"
...
...
imperative/python/test/unit/distributed/test_distributed.py
浏览文件 @
207a3463
...
@@ -17,11 +17,7 @@ import megengine as mge
...
@@ -17,11 +17,7 @@ import megengine as mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
from
megengine.core.ops.builtin
import
CollectiveComm
,
ParamPackConcat
,
ParamPackSplit
from
megengine.core.ops.builtin
import
CollectiveComm
,
ParamPackConcat
,
ParamPackSplit
from
megengine.device
import
get_default_device
from
megengine.device
import
get_default_device
from
megengine.distributed.helper
import
(
from
megengine.distributed.helper
import
param_pack_concat
,
param_pack_split
get_device_count_by_fork
,
param_pack_concat
,
param_pack_split
,
)
def
_assert_q_empty
(
q
):
def
_assert_q_empty
(
q
):
...
...
imperative/python/test/unit/functional/test_functional.py
浏览文件 @
207a3463
...
@@ -22,8 +22,7 @@ from megengine import Parameter, Tensor, is_cuda_available, tensor
...
@@ -22,8 +22,7 @@ from megengine import Parameter, Tensor, is_cuda_available, tensor
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.tensor.utils
import
make_shape_tuple
from
megengine.core.tensor.utils
import
make_shape_tuple
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.device
import
get_device_count
from
megengine.jit
import
trace
def
test_where
():
def
test_where
():
...
@@ -613,7 +612,7 @@ def test_nms():
...
@@ -613,7 +612,7 @@ def test_nms():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"gpu"
)
>
0
,
reason
=
"cuda does not support nchw int8"
get_device_count
(
"gpu"
)
>
0
,
reason
=
"cuda does not support nchw int8"
)
)
def
test_conv_bias
():
def
test_conv_bias
():
inp_scale
=
1.5
inp_scale
=
1.5
...
@@ -715,9 +714,7 @@ def test_conv_bias():
...
@@ -715,9 +714,7 @@ def test_conv_bias():
run
(
10
,
36
,
8
,
46
,
26
,
2
,
2
,
2
,
1
,
1
,
2
,
True
,
"relu"
)
run
(
10
,
36
,
8
,
46
,
26
,
2
,
2
,
2
,
1
,
1
,
2
,
True
,
"relu"
)
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
(
"gpu"
)
>
0
,
reason
=
"no int8 algorithm on cuda"
)
get_device_count_by_fork
(
"gpu"
)
>
0
,
reason
=
"no int8 algorithm on cuda"
)
def
test_batch_conv_bias
():
def
test_batch_conv_bias
():
inp_scale
=
1.5
inp_scale
=
1.5
w_scale
=
2.5
w_scale
=
2.5
...
...
imperative/python/test/unit/functional/test_functional_distributed.py
浏览文件 @
207a3463
...
@@ -16,7 +16,6 @@ import megengine.distributed as dist
...
@@ -16,7 +16,6 @@ import megengine.distributed as dist
from
megengine
import
Parameter
,
tensor
from
megengine
import
Parameter
,
tensor
from
megengine.core._imperative_rt.core2
import
sync
from
megengine.core._imperative_rt.core2
import
sync
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.functional.distributed
import
(
from
megengine.functional.distributed
import
(
all_gather
,
all_gather
,
all_reduce_max
,
all_reduce_max
,
...
...
imperative/python/test/unit/functional/test_tensor.py
浏览文件 @
207a3463
...
@@ -18,7 +18,6 @@ from megengine import tensor
...
@@ -18,7 +18,6 @@ from megengine import tensor
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core.tensor
import
megbrain_graph
as
G
from
megengine.core.tensor
import
megbrain_graph
as
G
from
megengine.core.tensor.utils
import
astensor1d
from
megengine.core.tensor.utils
import
astensor1d
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.jit
import
trace
from
megengine.jit
import
trace
from
megengine.utils.network
import
Network
,
set_symbolic_shape
from
megengine.utils.network
import
Network
,
set_symbolic_shape
from
megengine.utils.network_node
import
VarNode
from
megengine.utils.network_node
import
VarNode
...
...
imperative/python/test/unit/module/test_batchnorm.py
浏览文件 @
207a3463
...
@@ -16,7 +16,6 @@ import megengine as mge
...
@@ -16,7 +16,6 @@ import megengine as mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
from
megengine
import
Tensor
from
megengine
import
Tensor
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.core._trace_option
import
use_symbolic_shape
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.module
import
BatchNorm1d
,
BatchNorm2d
,
SyncBatchNorm
from
megengine.module
import
BatchNorm1d
,
BatchNorm2d
,
SyncBatchNorm
_assert_allclose
=
functools
.
partial
(
np
.
testing
.
assert_allclose
,
atol
=
5e-6
,
rtol
=
5e-6
)
_assert_allclose
=
functools
.
partial
(
np
.
testing
.
assert_allclose
,
atol
=
5e-6
,
rtol
=
5e-6
)
...
...
imperative/python/test/unit/module/test_qat.py
浏览文件 @
207a3463
...
@@ -6,7 +6,7 @@ import pytest
...
@@ -6,7 +6,7 @@ import pytest
import
megengine.utils.comp_graph_tools
as
cgtools
import
megengine.utils.comp_graph_tools
as
cgtools
from
megengine
import
jit
,
tensor
from
megengine
import
jit
,
tensor
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.functional
import
expand_dims
from
megengine.functional
import
expand_dims
from
megengine.module
import
(
from
megengine.module
import
(
BatchMatMulActivation
,
BatchMatMulActivation
,
...
@@ -101,9 +101,7 @@ def test_qat_conv():
...
@@ -101,9 +101,7 @@ def test_qat_conv():
np
.
testing
.
assert_allclose
(
normal_outputs
.
numpy
(),
qat_outputs
.
numpy
())
np
.
testing
.
assert_allclose
(
normal_outputs
.
numpy
(),
qat_outputs
.
numpy
())
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
(
"gpu"
)
>
0
,
reason
=
"no int8 algorithm on cuda"
)
get_device_count_by_fork
(
"gpu"
)
>
0
,
reason
=
"no int8 algorithm on cuda"
)
def
test_qat_batchmatmul_activation
():
def
test_qat_batchmatmul_activation
():
batch
=
4
batch
=
4
in_features
=
8
in_features
=
8
...
...
imperative/python/test/unit/quantization/test_observer.py
浏览文件 @
207a3463
...
@@ -13,7 +13,7 @@ import pytest
...
@@ -13,7 +13,7 @@ import pytest
import
megengine
as
mge
import
megengine
as
mge
import
megengine.distributed
as
dist
import
megengine.distributed
as
dist
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.quantization
import
QuantMode
,
create_qparams
from
megengine.quantization
import
QuantMode
,
create_qparams
from
megengine.quantization.observer
import
(
from
megengine.quantization.observer
import
(
ExponentialMovingAverageObserver
,
ExponentialMovingAverageObserver
,
...
@@ -78,7 +78,7 @@ def test_passive_observer():
...
@@ -78,7 +78,7 @@ def test_passive_observer():
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_sync_min_max_observer
():
def
test_sync_min_max_observer
():
word_size
=
get_device_count
_by_fork
(
"gpu"
)
word_size
=
get_device_count
(
"gpu"
)
x
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
x
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
np_min
,
np_max
=
x
.
min
(),
x
.
max
()
np_min
,
np_max
=
x
.
min
(),
x
.
max
()
...
@@ -96,7 +96,7 @@ def test_sync_min_max_observer():
...
@@ -96,7 +96,7 @@ def test_sync_min_max_observer():
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
require_ngpu
(
2
)
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
isolated_distributed
def
test_sync_exponential_moving_average_observer
():
def
test_sync_exponential_moving_average_observer
():
word_size
=
get_device_count
_by_fork
(
"gpu"
)
word_size
=
get_device_count
(
"gpu"
)
t
=
np
.
random
.
rand
()
t
=
np
.
random
.
rand
()
x1
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
x1
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
x2
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
x2
=
np
.
random
.
rand
(
3
*
word_size
,
3
,
3
,
3
).
astype
(
"float32"
)
...
...
imperative/python/test/unit/quantization/test_op.py
浏览文件 @
207a3463
...
@@ -12,7 +12,7 @@ import pytest
...
@@ -12,7 +12,7 @@ import pytest
import
megengine
as
mge
import
megengine
as
mge
import
megengine.functional
as
F
import
megengine.functional
as
F
from
megengine.core.tensor
import
dtype
from
megengine.core.tensor
import
dtype
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.functional.elemwise
import
_elemwise_multi_type
,
_elwise
from
megengine.functional.elemwise
import
_elemwise_multi_type
,
_elwise
from
megengine.quantization
import
QuantMode
,
create_qparams
from
megengine.quantization
import
QuantMode
,
create_qparams
...
@@ -68,7 +68,7 @@ def test_elemwise(kind):
...
@@ -68,7 +68,7 @@ def test_elemwise(kind):
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"gpu"
)
>
0
,
reason
=
"cuda does not support nchw int8"
get_device_count
(
"gpu"
)
>
0
,
reason
=
"cuda does not support nchw int8"
)
)
def
test_conv_bias
():
def
test_conv_bias
():
inp_scale
=
np
.
float32
(
np
.
random
.
rand
()
+
1
)
inp_scale
=
np
.
float32
(
np
.
random
.
rand
()
+
1
)
...
...
imperative/python/test/unit/random/test_rng.py
浏览文件 @
207a3463
...
@@ -26,12 +26,12 @@ from megengine.core.ops.builtin import (
...
@@ -26,12 +26,12 @@ from megengine.core.ops.builtin import (
PoissonRNG
,
PoissonRNG
,
UniformRNG
,
UniformRNG
,
)
)
from
megengine.d
istributed.helper
import
get_device_count_by_fork
from
megengine.d
evice
import
get_device_count
from
megengine.random
import
RNG
,
seed
,
uniform
from
megengine.random
import
RNG
,
seed
,
uniform
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_gaussian_op
():
def
test_gaussian_op
():
shape
=
(
shape
=
(
...
@@ -61,7 +61,7 @@ def test_gaussian_op():
...
@@ -61,7 +61,7 @@ def test_gaussian_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_uniform_op
():
def
test_uniform_op
():
shape
=
(
shape
=
(
...
@@ -89,7 +89,7 @@ def test_uniform_op():
...
@@ -89,7 +89,7 @@ def test_uniform_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_gamma_op
():
def
test_gamma_op
():
_shape
,
_scale
=
2
,
0.8
_shape
,
_scale
=
2
,
0.8
...
@@ -117,7 +117,7 @@ def test_gamma_op():
...
@@ -117,7 +117,7 @@ def test_gamma_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_beta_op
():
def
test_beta_op
():
_alpha
,
_beta
=
2
,
0.8
_alpha
,
_beta
=
2
,
0.8
...
@@ -148,7 +148,7 @@ def test_beta_op():
...
@@ -148,7 +148,7 @@ def test_beta_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_poisson_op
():
def
test_poisson_op
():
lam
=
F
.
full
([
8
,
9
,
11
,
12
],
value
=
2
,
dtype
=
"float32"
)
lam
=
F
.
full
([
8
,
9
,
11
,
12
],
value
=
2
,
dtype
=
"float32"
)
...
@@ -171,7 +171,7 @@ def test_poisson_op():
...
@@ -171,7 +171,7 @@ def test_poisson_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
get_device_count
(
"xpu"
)
<=
2
,
reason
=
"xpu counts need > 2"
,
)
)
def
test_permutation_op
():
def
test_permutation_op
():
n
=
1000
n
=
1000
...
@@ -205,7 +205,7 @@ def test_permutation_op():
...
@@ -205,7 +205,7 @@ def test_permutation_op():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_UniformRNG
():
def
test_UniformRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
@@ -233,7 +233,7 @@ def test_UniformRNG():
...
@@ -233,7 +233,7 @@ def test_UniformRNG():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_NormalRNG
():
def
test_NormalRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
@@ -262,7 +262,7 @@ def test_NormalRNG():
...
@@ -262,7 +262,7 @@ def test_NormalRNG():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_GammaRNG
():
def
test_GammaRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
@@ -295,7 +295,7 @@ def test_GammaRNG():
...
@@ -295,7 +295,7 @@ def test_GammaRNG():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_BetaRNG
():
def
test_BetaRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
@@ -330,7 +330,7 @@ def test_BetaRNG():
...
@@ -330,7 +330,7 @@ def test_BetaRNG():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_PoissonRNG
():
def
test_PoissonRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
@@ -359,7 +359,7 @@ def test_PoissonRNG():
...
@@ -359,7 +359,7 @@ def test_PoissonRNG():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
get_device_count
(
"xpu"
)
<=
1
,
reason
=
"xpu counts need > 1"
,
)
)
def
test_PermutationRNG
():
def
test_PermutationRNG
():
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
m1
=
RNG
(
seed
=
111
,
device
=
"xpu0"
)
...
...
imperative/python/test/unit/utils/test_network_node.py
浏览文件 @
207a3463
...
@@ -13,8 +13,7 @@ import megengine.random as rand
...
@@ -13,8 +13,7 @@ import megengine.random as rand
from
megengine.core._imperative_rt.core2
import
apply
from
megengine.core._imperative_rt.core2
import
apply
from
megengine.core._wrap
import
Device
from
megengine.core._wrap
import
Device
from
megengine.core.ops
import
builtin
from
megengine.core.ops
import
builtin
from
megengine.device
import
is_cuda_available
from
megengine.device
import
get_device_count
,
is_cuda_available
from
megengine.distributed.helper
import
get_device_count_by_fork
from
megengine.functional.external
import
tensorrt_runtime_opr
from
megengine.functional.external
import
tensorrt_runtime_opr
from
megengine.jit.tracing
import
trace
from
megengine.jit.tracing
import
trace
from
megengine.tensor
import
Tensor
from
megengine.tensor
import
Tensor
...
@@ -273,7 +272,7 @@ def test_deformable_ps_roi_pooling():
...
@@ -273,7 +272,7 @@ def test_deformable_ps_roi_pooling():
@
pytest
.
mark
.
skipif
(
@
pytest
.
mark
.
skipif
(
get_device_count
_by_fork
(
"gpu"
)
>
0
,
get_device_count
(
"gpu"
)
>
0
,
reason
=
"does not support int8 when gpu compute capability less than 6.1"
,
reason
=
"does not support int8 when gpu compute capability less than 6.1"
,
)
)
def
test_convbias
():
def
test_convbias
():
...
...
src/core/impl/comp_node/cuda/comp_node.cpp
浏览文件 @
207a3463
...
@@ -27,8 +27,14 @@ using namespace mgb;
...
@@ -27,8 +27,14 @@ using namespace mgb;
#include <thread>
#include <thread>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#ifdef __unix__
#include <unistd.h>
#include <sys/wait.h>
#endif
using
CudaCompNodeImpl
=
CudaCompNode
::
CompNodeImpl
;
using
CudaCompNodeImpl
=
CudaCompNode
::
CompNodeImpl
;
namespace
{
namespace
{
...
@@ -700,19 +706,90 @@ void CudaCompNode::EventImpl::do_device_wait_by(Impl* cn_impl) {
...
@@ -700,19 +706,90 @@ void CudaCompNode::EventImpl::do_device_wait_by(Impl* cn_impl) {
/* ===================== CudaCompNode static methods ===================== */
/* ===================== CudaCompNode static methods ===================== */
namespace
{
#ifndef __unix__
CUresult
get_device_count_forksafe
(
int
*
pcnt
)
{
cuInit
(
0
);
return
cuDeviceGetCount
(
pcnt
);
}
#else
struct
RAIICloseFD
:
NonCopyableObj
{
int
m_fd
=
-
1
;
RAIICloseFD
(
int
fd
)
:
m_fd
(
fd
)
{}
~
RAIICloseFD
()
{
close
();}
void
close
()
{
if
(
m_fd
!=
-
1
)
{
::
close
(
m_fd
);
m_fd
=
-
1
;
}
}
};
// an implementation that does not call cuInit
CUresult
get_device_count_forksafe
(
int
*
pcnt
)
{
auto
err
=
cuDeviceGetCount
(
pcnt
);
if
(
err
!=
CUDA_ERROR_NOT_INITIALIZED
)
return
err
;
// cuInit not called, call it in child process
int
fd
[
2
];
mgb_assert
(
pipe
(
fd
)
==
0
,
"pipe() failed"
);
int
fdr
=
fd
[
0
],
fdw
=
fd
[
1
];
RAIICloseFD
fdr_guard
(
fdr
);
RAIICloseFD
fdw_guard
(
fdw
);
auto
cpid
=
fork
();
mgb_assert
(
cpid
!=
-
1
,
"fork() failed"
);
if
(
cpid
==
0
)
{
fdr_guard
.
close
();
do
{
err
=
cuInit
(
0
);
if
(
err
!=
CUDA_SUCCESS
)
break
;
err
=
cuDeviceGetCount
(
pcnt
);
}
while
(
0
);
auto
sz
=
write
(
fdw
,
&
err
,
sizeof
(
err
));
if
(
sz
==
sizeof
(
err
)
&&
err
==
CUDA_SUCCESS
)
{
sz
=
write
(
fdw
,
pcnt
,
sizeof
(
*
pcnt
));
}
fdw_guard
.
close
();
std
::
quick_exit
(
0
);
}
fdw_guard
.
close
();
auto
sz
=
read
(
fdr
,
&
err
,
sizeof
(
err
));
mgb_assert
(
sz
==
sizeof
(
err
),
"failed to read error code from child"
);
if
(
err
==
CUDA_SUCCESS
)
{
sz
=
read
(
fdr
,
pcnt
,
sizeof
(
*
pcnt
));
mgb_assert
(
sz
==
sizeof
(
*
pcnt
),
"failed to read device count from child"
);
return
err
;
}
// try again, maybe another thread called cuInit while we fork
auto
err2
=
cuDeviceGetCount
(
pcnt
);
if
(
err2
==
CUDA_SUCCESS
)
return
err2
;
if
(
err2
==
CUDA_ERROR_NOT_INITIALIZED
)
return
err
;
return
err2
;
}
#endif
const
char
*
cu_get_error_string
(
CUresult
err
)
{
const
char
*
ret
=
nullptr
;
cuGetErrorString
(
err
,
&
ret
);
if
(
!
ret
)
ret
=
"unknown cuda error"
;
return
ret
;
}
}
// namespace
bool
CudaCompNode
::
available
()
{
bool
CudaCompNode
::
available
()
{
static
int
result
=
-
1
;
static
int
result
=
-
1
;
static
Spinlock
mtx
;
static
Spinlock
mtx
;
MGB_LOCK_GUARD
(
mtx
);
MGB_LOCK_GUARD
(
mtx
);
if
(
result
==
-
1
)
{
if
(
result
==
-
1
)
{
int
ndev
=
-
1
;
int
ndev
=
-
1
;
auto
err
=
cudaGetDeviceCount
(
&
ndev
);
auto
err
=
get_device_count_forksafe
(
&
ndev
);
result
=
err
==
cudaSuccess
&&
ndev
>
0
;
result
=
err
==
CUDA_SUCCESS
&&
ndev
>
0
;
if
(
!
result
)
{
if
(
!
result
)
{
mgb_log_warn
(
"cuda unavailable: %s(%d) ndev=%d"
,
mgb_log_warn
(
"cuda unavailable: %s(%d) ndev=%d"
,
cu
daGetErrorS
tring
(
err
),
static_cast
<
int
>
(
err
),
ndev
);
cu
_get_error_s
tring
(
err
),
static_cast
<
int
>
(
err
),
ndev
);
}
}
if
(
err
==
cudaErrorInitializationError
)
{
if
(
err
==
CUDA_ERROR_NOT_INITIALIZED
)
{
mgb_throw
(
std
::
runtime_error
,
"cuda initialization error."
);
mgb_throw
(
std
::
runtime_error
,
"cuda initialization error."
);
}
}
}
}
...
@@ -857,11 +934,11 @@ size_t CudaCompNode::get_device_count(bool warn) {
...
@@ -857,11 +934,11 @@ size_t CudaCompNode::get_device_count(bool warn) {
static
Spinlock
mtx
;
static
Spinlock
mtx
;
MGB_LOCK_GUARD
(
mtx
);
MGB_LOCK_GUARD
(
mtx
);
if
(
cnt
==
-
1
)
{
if
(
cnt
==
-
1
)
{
auto
err
=
cudaGetDeviceCount
(
&
cnt
);
auto
err
=
get_device_count_forksafe
(
&
cnt
);
if
(
err
!=
cudaSuccess
)
{
if
(
err
!=
CUDA_SUCCESS
)
{
if
(
warn
)
if
(
warn
)
mgb_log_error
(
"cudaGetDeviceCount failed: %s (err %d)"
,
mgb_log_error
(
"cudaGetDeviceCount failed: %s (err %d)"
,
cu
daGetErrorS
tring
(
err
),
int
(
err
));
cu
_get_error_s
tring
(
err
),
int
(
err
));
cnt
=
0
;
cnt
=
0
;
}
}
mgb_assert
(
cnt
>=
0
);
mgb_assert
(
cnt
>=
0
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录