Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
597a1e79
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
597a1e79
编写于
4月 26, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(imperative): add interface to clear algorithm cache
GitOrigin-RevId: 662618954bc5dee254f294f4ce8b2e4efb95d87b
上级
e2f5156b
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
50 addition
and
343 deletion
+50
-343
dnn/include/megdnn/algorithm_cache.h
dnn/include/megdnn/algorithm_cache.h
+1
-1
imperative/python/megengine/core/_config.py
imperative/python/megengine/core/_config.py
+4
-1
imperative/python/megengine/functional/debug_param.py
imperative/python/megengine/functional/debug_param.py
+15
-13
imperative/python/src/tensor.cpp
imperative/python/src/tensor.cpp
+3
-0
imperative/python/test/integration/test_correctness_mnistnet.py
...tive/python/test/integration/test_correctness_mnistnet.py
+0
-289
imperative/python/test/integration/test_dp_correctness.py
imperative/python/test/integration/test_dp_correctness.py
+3
-6
imperative/python/test/unit/module/test_conv.py
imperative/python/test/unit/module/test_conv.py
+6
-15
imperative/python/test/unit/quantization/test_op.py
imperative/python/test/unit/quantization/test_op.py
+1
-2
imperative/python/test/unit/utils/test_network_node.py
imperative/python/test/unit/utils/test_network_node.py
+17
-16
未找到文件。
dnn/include/megdnn/algorithm_cache.h
浏览文件 @
597a1e79
...
...
@@ -71,7 +71,7 @@ public:
MGE_WIN_DECLSPEC_FUC
Result
get
(
const
Key
&
key
);
void
clear
();
MGE_WIN_DECLSPEC_FUC
void
clear
();
private:
struct
Hash
{
...
...
imperative/python/megengine/core/_config.py
浏览文件 @
597a1e79
...
...
@@ -9,7 +9,7 @@
import
os
from
contextlib
import
contextmanager
from
._imperative_rt.core2
import
get_option
,
set_option
from
._imperative_rt.core2
import
_clear_algorithm_cache
,
get_option
,
set_option
__compute_mode
=
"default"
__conv_format
=
"default"
...
...
@@ -44,6 +44,9 @@ def benchmark_kernel(mod):
@
benchmark_kernel
.
setter
def
benchmark_kernel
(
mod
,
option
:
bool
):
global
_benchmark_kernel
# try different strategy, then clear algorithm cache
if
option
!=
_benchmark_kernel
:
_clear_algorithm_cache
()
_benchmark_kernel
=
option
...
...
imperative/python/megengine/functional/debug_param.py
浏览文件 @
597a1e79
...
...
@@ -9,6 +9,7 @@
import
os
from
..core
import
_config
from
..core._imperative_rt.core2
import
_clear_algorithm_cache
from
..core.ops
import
builtin
from
..logger
import
get_logger
from
..utils.deprecation
import
deprecated
...
...
@@ -52,7 +53,6 @@ def set_execution_strategy(option):
* "HEURISTIC": uses heuristic to choose the fastest algorithm.
* "PROFILE": runs possible algorithms on a real device to find the best one.
* "REPRODUCIBLE": uses algorithms that are reproducible.
* "OPTIMIZED": uses algorithms that are optimized.
The default strategy is "HEURISTIC", these options can be combined to
form a combination option, e.g. PROFILE_REPRODUCIBLE is a combination
...
...
@@ -70,22 +70,25 @@ def set_execution_strategy(option):
It can also be set through the environment variable ``MEGENGINE_EXECUTION_STRATEGY``.
"""
_benchmark_kernel
=
False
_deterministic_kernel
=
False
if
isinstance
(
option
,
Strategy
):
_
config
.
_
benchmark_kernel
=
(
_benchmark_kernel
=
(
True
if
option
&
_valid_string_option
[
"PROFILE"
]
!=
Strategy
(
0
)
else
False
)
_
config
.
_
deterministic_kernel
=
(
_deterministic_kernel
=
(
True
if
option
&
_valid_string_option
[
"REPRODUCIBLE"
]
!=
Strategy
(
0
)
else
False
)
if
_benchmark_kernel
!=
_config
.
_benchmark_kernel
:
_clear_algorithm_cache
()
_config
.
_benchmark_kernel
=
_benchmark_kernel
_config
.
_deterministic_kernel
=
_deterministic_kernel
return
assert
isinstance
(
option
,
str
)
_config
.
_benchmark_kernel
=
False
_config
.
_deterministic_kernel
=
False
for
opt
in
option
.
split
(
"_"
):
if
not
opt
in
_valid_string_option
:
raise
ValueError
(
...
...
@@ -93,10 +96,12 @@ def set_execution_strategy(option):
_valid_string_option
.
keys
()
)
)
_config
.
_benchmark_kernel
|=
_valid_string_option
[
opt
]
==
Strategy
.
PROFILE
_config
.
_deterministic_kernel
|=
(
_valid_string_option
[
opt
]
==
Strategy
.
REPRODUCIBLE
)
_benchmark_kernel
|=
_valid_string_option
[
opt
]
==
Strategy
.
PROFILE
_deterministic_kernel
|=
_valid_string_option
[
opt
]
==
Strategy
.
REPRODUCIBLE
if
_benchmark_kernel
!=
_config
.
_benchmark_kernel
:
_clear_algorithm_cache
()
_config
.
_benchmark_kernel
=
_benchmark_kernel
_config
.
_deterministic_kernel
=
_deterministic_kernel
@
deprecated
(
version
=
"1.3"
,
reason
=
"use get_execution_strategy() instead"
)
...
...
@@ -107,6 +112,3 @@ def get_conv_execution_strategy() -> str:
@
deprecated
(
version
=
"1.3"
,
reason
=
"use set_execution_strategy() instead"
)
def
set_conv_execution_strategy
(
option
:
str
):
return
set_execution_strategy
(
option
)
set_execution_strategy
(
os
.
getenv
(
"MEGENGINE_EXECUTION_STRATEGY"
,
"HEURISTIC"
))
imperative/python/src/tensor.cpp
浏览文件 @
597a1e79
...
...
@@ -26,6 +26,7 @@
#include "megbrain/opr/io.h"
#include "megbrain/plugin/profiler.h"
#include "megbrain/utils/stats.h"
#include "megdnn/algorithm_cache.h"
#include "./common.h"
#include "./grad.h"
...
...
@@ -1428,6 +1429,8 @@ void init_tensor(py::module m) {
return
set_amp_prec_dtype
(
false
,
dtype_name
);
});
m
.
def
(
"_clear_algorithm_cache"
,
[]
{
megdnn
::
AlgorithmCache
::
instance
().
clear
();
});
py
::
register_exception
<
TraceError
>
(
m
,
"TraceError"
);
}
...
...
imperative/python/test/integration/test_correctness_mnistnet.py
已删除
100644 → 0
浏览文件 @
e2f5156b
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
import
re
import
subprocess
import
sys
import
numpy
as
np
import
pytest
import
megengine
as
mge
import
megengine.autodiff
as
ad
import
megengine.functional
as
F
from
megengine
import
jit
from
megengine.core._trace_option
import
set_symbolic_shape
from
megengine.core.ops
import
builtin
from
megengine.core.tensor.utils
import
make_shape_tuple
from
megengine.functional.debug_param
import
set_execution_strategy
from
megengine.jit
import
SublinearMemoryConfig
from
megengine.module
import
(
AdaptiveAvgPool2d
,
AvgPool2d
,
BatchNorm2d
,
Conv2d
,
Linear
,
Module
,
)
from
megengine.optimizer
import
SGD
from
megengine.tensor
import
Tensor
Strategy
=
builtin
.
ops
.
Convolution
.
Strategy
def
get_gpu_name
():
try
:
gpu_info
=
subprocess
.
check_output
(
[
"nvidia-smi"
,
"--query-gpu=gpu_name"
,
"--format=csv,noheader"
]
)
gpu_info
=
gpu_info
.
decode
(
"ascii"
).
split
(
"
\n
"
)[
0
]
except
:
gpu_info
=
"None"
return
gpu_info
def
get_cpu_name
():
cpu_info
=
"None"
try
:
cpu_info
=
subprocess
.
check_output
([
"cat"
,
"/proc/cpuinfo"
]).
decode
(
"ascii"
)
for
line
in
cpu_info
.
split
(
"
\n
"
):
if
"model name"
in
line
:
return
re
.
sub
(
".*model name.*:"
,
""
,
line
,
1
).
strip
()
except
:
pass
return
cpu_info
def
get_xpu_name
():
if
mge
.
is_cuda_available
():
return
get_gpu_name
()
else
:
return
get_cpu_name
()
class
MnistNet
(
Module
):
def
__init__
(
self
,
has_bn
=
False
,
use_adaptive_pooling
=
False
):
super
().
__init__
()
self
.
conv0
=
Conv2d
(
1
,
20
,
kernel_size
=
5
,
bias
=
True
)
if
use_adaptive_pooling
:
self
.
pool0
=
AdaptiveAvgPool2d
(
12
)
else
:
self
.
pool0
=
AvgPool2d
(
2
)
self
.
conv1
=
Conv2d
(
20
,
20
,
kernel_size
=
5
,
bias
=
True
)
self
.
pool1
=
AvgPool2d
(
2
)
self
.
fc0
=
Linear
(
20
*
4
*
4
,
500
,
bias
=
True
)
self
.
fc1
=
Linear
(
500
,
10
,
bias
=
True
)
self
.
bn0
=
None
self
.
bn1
=
None
if
has_bn
:
self
.
bn0
=
BatchNorm2d
(
20
)
self
.
bn1
=
BatchNorm2d
(
20
)
def
forward
(
self
,
x
):
x
=
self
.
conv0
(
x
)
if
self
.
bn0
:
x
=
self
.
bn0
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
pool0
(
x
)
x
=
self
.
conv1
(
x
)
if
self
.
bn1
:
x
=
self
.
bn1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
pool1
(
x
)
x
=
F
.
flatten
(
x
,
1
)
x
=
self
.
fc0
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
fc1
(
x
)
return
x
def
train
(
data
,
label
,
net
,
opt
,
gm
):
with
gm
:
pred
=
net
(
data
)
loss
=
F
.
nn
.
cross_entropy
(
pred
,
label
)
gm
.
backward
(
loss
)
return
loss
def
update_model
(
model_path
):
"""
Update the dumped model with test cases for new reference values.
The model with pre-trained weights is trained for one iter with the test data attached.
The loss and updated net state dict is dumped.
.. code-block:: python
from test_correctness import update_model
update_model('mnist_model_with_test.mge') # for gpu
update_model('mnist_model_with_test_cpu.mge') # for cpu
"""
net
=
MnistNet
(
has_bn
=
True
)
checkpoint
=
mge
.
load
(
model_path
)
net
.
load_state_dict
(
checkpoint
[
"net_init"
])
lr
=
checkpoint
[
"sgd_lr"
]
opt
=
SGD
(
net
.
parameters
(),
lr
=
lr
)
gm
=
ad
.
GradManager
().
attach
(
net
.
parameters
())
data
=
Tensor
(
checkpoint
[
"data"
],
dtype
=
np
.
float32
)
label
=
Tensor
(
checkpoint
[
"label"
],
dtype
=
np
.
int32
)
opt
.
clear_grad
()
loss
=
train
(
data
,
label
,
net
,
opt
,
gm
)
opt
.
step
()
xpu_name
=
get_xpu_name
()
checkpoint
.
update
(
{
"net_updated"
:
net
.
state_dict
(),
"loss"
:
loss
.
numpy
(),
"xpu"
:
xpu_name
}
)
mge
.
save
(
checkpoint
,
model_path
)
def
run_train
(
model_path
,
use_jit
,
use_symbolic
,
sublinear_memory_config
=
None
,
max_err
=
None
,
use_adaptive_pooling
=
False
,
):
"""
Load the model with test cases and run the training for one iter.
The loss and updated weights are compared with reference value to verify the correctness.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Please think twice before you do so.
"""
net
=
MnistNet
(
has_bn
=
True
,
use_adaptive_pooling
=
use_adaptive_pooling
)
checkpoint
=
mge
.
load
(
model_path
)
net
.
load_state_dict
(
checkpoint
[
"net_init"
])
lr
=
checkpoint
[
"sgd_lr"
]
opt
=
SGD
(
net
.
parameters
(),
lr
=
lr
)
gm
=
ad
.
GradManager
().
attach
(
net
.
parameters
())
data
=
Tensor
(
checkpoint
[
"data"
],
dtype
=
np
.
float32
)
label
=
Tensor
(
checkpoint
[
"label"
],
dtype
=
np
.
int32
)
if
max_err
is
None
:
max_err
=
1e-5
train_func
=
train
if
use_jit
:
train_func
=
jit
.
trace
(
train_func
,
symbolic
=
use_symbolic
,
sublinear_memory_config
=
sublinear_memory_config
,
)
opt
.
clear_grad
()
loss
=
train_func
(
data
,
label
,
net
,
opt
,
gm
)
opt
.
step
()
np
.
testing
.
assert_allclose
(
loss
.
numpy
(),
checkpoint
[
"loss"
],
atol
=
max_err
)
for
param
,
param_ref
in
zip
(
net
.
state_dict
().
items
(),
checkpoint
[
"net_updated"
].
items
()
):
assert
param
[
0
]
==
param_ref
[
0
]
if
"bn"
in
param
[
0
]:
ref
=
param_ref
[
1
].
reshape
(
param
[
1
].
shape
)
np
.
testing
.
assert_allclose
(
param
[
1
],
ref
,
atol
=
max_err
)
else
:
np
.
testing
.
assert_allclose
(
param
[
1
],
param_ref
[
1
],
atol
=
max_err
)
def
run_eval
(
model_path
,
use_symbolic
,
sublinear_memory_config
=
None
,
max_err
=
None
,
use_adaptive_pooling
=
False
,
):
"""
Load the model with test cases and run the training for one iter.
The loss and updated weights are compared with reference value to verify the correctness.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Please think twice before you do so.
"""
net
=
MnistNet
(
has_bn
=
True
,
use_adaptive_pooling
=
use_adaptive_pooling
)
checkpoint
=
mge
.
load
(
model_path
)
net
.
load_state_dict
(
checkpoint
[
"net_init"
])
data
=
Tensor
(
checkpoint
[
"data"
],
dtype
=
np
.
float32
)
def
eval_fun
(
data
,
*
,
net
=
None
):
pred
=
net
(
data
)
return
pred
refer_value
=
eval_fun
(
data
,
net
=
net
)
eval_fun
=
jit
.
trace
(
eval_fun
,
symbolic
=
use_symbolic
)
for
_
in
range
(
3
):
new_value
=
eval_fun
(
data
,
net
=
net
)
np
.
testing
.
assert_allclose
(
new_value
.
numpy
(),
refer_value
.
numpy
(),
atol
=
max_err
)
@
pytest
.
mark
.
skip
(
reason
=
"close it when cu111 ci"
)
def
test_correctness
():
if
mge
.
is_cuda_available
():
model_name
=
"mnist_model_with_test.mge"
else
:
model_name
=
"mnist_model_with_test_cpu.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
set_execution_strategy
(
Strategy
.
HEURISTIC
|
Strategy
.
REPRODUCIBLE
)
run_train
(
model_path
,
False
,
False
,
max_err
=
1e-5
)
run_train
(
model_path
,
True
,
False
,
max_err
=
1e-5
)
run_train
(
model_path
,
True
,
True
,
max_err
=
1e-5
)
# sublinear
config
=
SublinearMemoryConfig
(
genetic_nr_iter
=
10
)
run_train
(
model_path
,
True
,
True
,
sublinear_memory_config
=
config
,
max_err
=
1e-5
,
)
run_eval
(
model_path
,
False
,
max_err
=
1e-7
)
run_eval
(
model_path
,
True
,
max_err
=
1e-7
)
@
pytest
.
mark
.
skip
(
reason
=
"close it when cu111 ci"
)
def
test_correctness_use_adaptive_pooling
():
if
mge
.
is_cuda_available
():
model_name
=
"mnist_model_with_test.mge"
else
:
model_name
=
"mnist_model_with_test_cpu.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
set_execution_strategy
(
"HEURISTIC_REPRODUCIBLE"
)
run_train
(
model_path
,
False
,
False
,
max_err
=
1e-5
,
use_adaptive_pooling
=
True
)
run_train
(
model_path
,
True
,
False
,
max_err
=
1e-5
,
use_adaptive_pooling
=
True
)
run_train
(
model_path
,
True
,
True
,
max_err
=
1e-5
,
use_adaptive_pooling
=
True
)
# sublinear
config
=
SublinearMemoryConfig
(
genetic_nr_iter
=
10
)
run_train
(
model_path
,
True
,
True
,
sublinear_memory_config
=
config
,
max_err
=
1e-5
,
use_adaptive_pooling
=
True
,
)
run_eval
(
model_path
,
False
,
max_err
=
1e-7
,
use_adaptive_pooling
=
True
)
run_eval
(
model_path
,
True
,
max_err
=
1e-7
,
use_adaptive_pooling
=
True
)
imperative/python/test/integration/test_dp_correctness.py
浏览文件 @
597a1e79
...
...
@@ -7,11 +7,8 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
import
platform
import
re
import
subprocess
import
sys
from
math
import
ceil
import
numpy
as
np
import
pytest
...
...
@@ -20,8 +17,6 @@ import megengine as mge
import
megengine.autodiff
as
ad
import
megengine.distributed
as
dist
import
megengine.functional
as
F
from
megengine.device
import
get_default_device
,
set_default_device
from
megengine.functional.debug_param
import
set_execution_strategy
from
megengine.module
import
AvgPool2d
,
BatchNorm2d
,
Conv2d
,
Linear
,
Module
from
megengine.optimizer
import
SGD
from
megengine.tensor
import
Tensor
...
...
@@ -198,5 +193,7 @@ def run_test(
def
test_dp_correctness
():
model_name
=
"mnist_model_with_test.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
set_execution_strategy
(
"HEURISTIC_REPRODUCIBLE"
)
old
=
mge
.
config
.
deterministic_kernel
mge
.
config
.
deterministic_kernel
=
True
run_test
(
model_path
,
False
,
False
,
max_err
=
5e-5
)
mge
.
config
.
deterministic_kernel
=
old
imperative/python/test/unit/module/test_conv.py
浏览文件 @
597a1e79
...
...
@@ -11,21 +11,9 @@ import itertools
import
numpy
as
np
import
pytest
import
megengine
as
mge
import
megengine.module
as
M
from
megengine
import
Parameter
,
tensor
from
megengine.functional.debug_param
import
(
get_execution_strategy
,
set_execution_strategy
,
)
from
megengine.module
import
ConvTranspose2d
,
ConvTranspose3d
,
LocalConv2d
@
pytest
.
fixture
def
reproducible
():
old
=
get_execution_strategy
()
set_execution_strategy
(
"HEURISTIC_REPRODUCIBLE"
)
yield
set_execution_strategy
(
old
)
from
megengine
import
tensor
# NOTE: test in module for convenience. should really test in functional
...
...
@@ -33,7 +21,9 @@ def reproducible():
"name"
,
[
"Conv1d"
,
"Conv2d"
,
"Conv3d"
,
"ConvTranspose2d"
,
"ConvTranspose3d"
,
"LocalConv2d"
],
)
def
test_conv_dtype_promotion
(
name
,
reproducible
):
def
test_conv_dtype_promotion
(
name
):
old
=
mge
.
config
.
deterministic_kernel
mge
.
config
.
deterministic_kernel
=
True
N
,
Ci
,
Co
,
K
=
2
,
16
,
32
,
3
S
=
(
7
,)
*
int
(
name
[
-
2
])
if
"Local"
in
name
:
...
...
@@ -42,3 +32,4 @@ def test_conv_dtype_promotion(name, reproducible):
m
=
getattr
(
M
,
name
)(
Ci
,
Co
,
K
)
x
=
tensor
(
np
.
random
.
random
(
size
=
(
N
,
Ci
)
+
S
).
astype
(
"float16"
))
np
.
testing
.
assert_equal
(
m
(
x
).
numpy
(),
m
(
x
.
astype
(
"float32"
)).
numpy
())
mge
.
config
.
deterministic_kernel
=
old
imperative/python/test/unit/quantization/test_op.py
浏览文件 @
597a1e79
...
...
@@ -255,9 +255,8 @@ def test_conv_bias_int4():
run
(
10
,
36
,
8
,
46
,
26
,
2
,
2
,
2
,
1
,
1
,
2
,
True
,
"relu"
)
@
pytest
.
mark
.
require_ngpu
(
1
)
@
pytest
.
mark
.
skipif
(
get_cuda_compute_capability
(
0
)
<
61
,
get_
device_count
(
"gpu"
)
>
0
and
get_
cuda_compute_capability
(
0
)
<
61
,
reason
=
"does not support int8 when gpu compute capability less than 6.1"
,
)
def
test_conv_transpose2d
():
...
...
imperative/python/test/unit/utils/test_network_node.py
浏览文件 @
597a1e79
...
...
@@ -5,6 +5,7 @@ import platform
import
numpy
as
np
import
pytest
import
megengine
as
mge
import
megengine.core.tensor.dtype
as
dtype
import
megengine.core.tensor.megbrain_graph
as
G
import
megengine.functional
as
F
...
...
@@ -18,10 +19,6 @@ from megengine.device import (
get_device_count
,
is_cuda_available
,
)
from
megengine.functional.debug_param
import
(
get_execution_strategy
,
set_execution_strategy
,
)
from
megengine.functional.external
import
tensorrt_runtime_opr
from
megengine.jit.tracing
import
trace
from
megengine.tensor
import
Tensor
...
...
@@ -110,25 +107,30 @@ def test_matinv():
@
pytest
.
mark
.
parametrize
(
"
execution_strategy"
,
[
"HEURISTIC_REPRODUCIBLE"
,
"PROFILE_REPRODUCIBLE"
]
"
benchmark_kernel, max_err"
,
[(
False
,
None
),
(
True
,
1e-5
)],
)
def
test_matmul
(
execution_strategy
):
def
test_matmul
(
monkeypatch
,
benchmark_kernel
,
max_err
):
if
get_device_count
(
"gpu"
)
==
0
and
benchmark_kernel
:
return
monkeypatch
.
setenv
(
"MGE_FASTRUN_CACHE_TYPE"
,
"MEMORY"
)
old1
,
old2
=
(
mge
.
config
.
benchmark_kernel
,
mge
.
config
.
deterministic_kernel
,
)
mge
.
config
.
benchmark_kernel
=
benchmark_kernel
mge
.
config
.
deterministic_kernel
=
True
@
trace
(
symbolic
=
True
,
capture_as_const
=
True
)
def
fwd
(
data1
,
data2
):
return
F
.
matmul
(
data1
,
data2
)
old
=
get_execution_strategy
()
set_execution_strategy
(
execution_strategy
)
max_err
=
None
if
execution_strategy
==
"PROFILE_REPRODUCIBLE"
:
max_err
=
1e-5
data1
=
Tensor
(
np
.
random
.
random
((
32
,
64
)))
data2
=
Tensor
(
np
.
random
.
random
((
64
,
16
)))
result
=
fwd
(
data1
,
data2
)
check_pygraph_dump
(
fwd
,
[
data1
,
data2
],
[
result
],
max_err
=
max_err
)
set_execution_strategy
(
old
)
mge
.
config
.
benchmark_kernel
=
old1
mge
.
config
.
deterministic_kernel
=
old2
monkeypatch
.
delenv
(
"MGE_FASTRUN_CACHE_TYPE"
,
raising
=
False
)
def
test_batchmatmul
():
...
...
@@ -290,9 +292,8 @@ def test_deformable_ps_roi_pooling():
check_pygraph_dump
(
fwd
,
[
inp
,
rois
,
trans
],
[
result
])
@
pytest
.
mark
.
require_ngpu
(
1
)
@
pytest
.
mark
.
skipif
(
get_cuda_compute_capability
(
0
)
<
61
,
get_
device_count
(
"gpu"
)
>
0
and
get_
cuda_compute_capability
(
0
)
<
61
,
reason
=
"does not support int8 when gpu compute capability less than 6.1"
,
)
def
test_convbias
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录