Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
a3ea1f15
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
a3ea1f15
编写于
11月 30, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/opr): add fast profile and combined Execution strategy
GitOrigin-RevId: 843dc3a7907bc6ec9a728ec6425b7910d9c136c5
上级
80f00643
变更
31
隐藏空白更改
内联
并排
Showing
31 changed file
with
361 addition
and
301 deletion
+361
-301
dnn/include/megdnn/basic_types.h
dnn/include/megdnn/basic_types.h
+56
-0
dnn/include/megdnn/oprs/base.h
dnn/include/megdnn/oprs/base.h
+2
-0
dnn/scripts/gen_flatbuffers_schema.py
dnn/scripts/gen_flatbuffers_schema.py
+6
-2
dnn/scripts/gen_param_defs.py
dnn/scripts/gen_param_defs.py
+17
-5
dnn/scripts/gen_tablegen.py
dnn/scripts/gen_tablegen.py
+7
-2
dnn/src/common/algo_base.h
dnn/src/common/algo_base.h
+0
-2
dnn/src/common/utils.h
dnn/src/common/utils.h
+0
-55
dnn/src/cuda/convolution3d/backward_filter/algo.h
dnn/src/cuda/convolution3d/backward_filter/algo.h
+0
-1
imperative/python/megengine/functional/debug_param.py
imperative/python/megengine/functional/debug_param.py
+38
-14
imperative/python/test/integration/test_correctness_mnistnet.py
...tive/python/test/integration/test_correctness_mnistnet.py
+4
-1
imperative/tablegen/autogen.cpp
imperative/tablegen/autogen.cpp
+14
-0
imperative/tablegen/helper.h
imperative/tablegen/helper.h
+3
-0
sdk/load-and-run/src/mgblar.cpp
sdk/load-and-run/src/mgblar.cpp
+33
-10
src/core/impl/utils/persistent_cache.cpp
src/core/impl/utils/persistent_cache.cpp
+5
-5
src/core/include/megbrain/common.h
src/core/include/megbrain/common.h
+12
-0
src/core/include/megbrain/comp_node.h
src/core/include/megbrain/comp_node.h
+0
-1
src/core/include/megbrain/graph/operator_node.h
src/core/include/megbrain/graph/operator_node.h
+0
-1
src/core/include/megbrain/graph/var_node.h
src/core/include/megbrain/graph/var_node.h
+0
-1
src/core/include/megbrain/ir/base.td
src/core/include/megbrain/ir/base.td
+5
-4
src/core/include/megbrain/utils/enum_class_bit.h
src/core/include/megbrain/utils/enum_class_bit.h
+0
-89
src/core/include/megbrain/utils/persistent_cache.h
src/core/include/megbrain/utils/persistent_cache.h
+1
-2
src/gopt/impl/inference.cpp
src/gopt/impl/inference.cpp
+3
-6
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+16
-1
src/opr/impl/dnn/dnn.sereg.h
src/opr/impl/dnn/dnn.sereg.h
+0
-1
src/opr/impl/search_policy/algo_chooser.cpp
src/opr/impl/search_policy/algo_chooser.cpp
+62
-59
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
+18
-10
src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h
.../include/megbrain/opr/search_policy/algo_chooser_helper.h
+0
-2
src/opr/test/blas.cpp
src/opr/test/blas.cpp
+4
-3
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+39
-23
test/src/include/megbrain/test/helper.h
test/src/include/megbrain/test/helper.h
+1
-0
tools/param_defs/mgb_opr_param_defs.py
tools/param_defs/mgb_opr_param_defs.py
+15
-1
未找到文件。
dnn/include/megdnn/basic_types.h
浏览文件 @
a3ea1f15
...
...
@@ -506,10 +506,66 @@ struct DynOutMallocPolicyCall {
}
};
template
<
typename
T
>
class
EnumClassBit
{
std
::
underlying_type_t
<
T
>
m_val
;
constexpr
EnumClassBit
(
std
::
underlying_type_t
<
T
>
v
)
:
m_val
(
v
)
{}
public:
constexpr
EnumClassBit
(
T
v
)
:
m_val
(
static_cast
<
std
::
underlying_type_t
<
T
>>
(
v
))
{}
constexpr
operator
T
()
const
{
return
static_cast
<
T
>
(
m_val
);
}
constexpr
explicit
operator
bool
()
const
{
return
m_val
;
}
#define DEF_OPR(op) \
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR
(
&
)
DEF_OPR
(
|
)
DEF_OPR
(
^
)
constexpr
EnumClassBit
operator
~
()
const
{
return
~
m_val
;
}
#undef DEF_OPR
};
#endif // MEGDNN_CC_HOST
}
// namespace megdnn
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
return ::megdnn::EnumClassBit<cls>(x) \
op ::megdnn::EnumClassBit<cls>(y); \
} \
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \
::megdnn::EnumClassBit<cls> x, cls y) { \
return x op ::megdnn::EnumClassBit<cls>(y); \
}
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##=(cls& x, cls y) { \
x = x op ::megdnn::EnumClassBit<cls>(y); \
return x; \
}
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
return ~::megdnn::EnumClassBit<cls>(x); \
}
#include "megdnn/internal/visibility_epilogue.h"
// vim: syntax=cpp.doxygen
dnn/include/megdnn/oprs/base.h
浏览文件 @
a3ea1f15
...
...
@@ -251,6 +251,8 @@ protected:
Handle
::
HandleType
m_handle_type
=
Handle
::
HandleType
::
NAIVE
;
};
MEGDNN_DEF_ENUM_CLASS_BIT_OPR
(
Algorithm
::
Attribute
)
//! policy for executing the operator
struct
ExecutionPolicy
{
//! INVALID_ALGO_TYPE algo_type means using heuristic
...
...
dnn/scripts/gen_flatbuffers_schema.py
浏览文件 @
a3ea1f15
...
...
@@ -53,9 +53,13 @@ class FlatBuffersWriter(IndentWriterBase):
e
=
self
.
_enums
[(
p
,
e
)]
self
.
_write_doc
(
e
.
name
)
self
.
_write
(
"enum %s%s : uint {"
,
p
,
e
.
name
,
indent
=
1
)
for
member
in
e
.
members
:
for
idx
,
member
in
enumerate
(
e
.
members
)
:
self
.
_write_doc
(
member
)
self
.
_write
(
"%s,"
,
scramble_enum_member_name
(
str
(
member
)))
if
e
.
combined
:
self
.
_write
(
"%s=%d,"
,
scramble_enum_member_name
(
str
(
member
)),
1
<<
idx
)
else
:
self
.
_write
(
"%s,"
,
scramble_enum_member_name
(
str
(
member
)))
self
.
_write
(
"}
\n
"
,
indent
=-
1
)
def
_write_doc
(
self
,
doc
):
...
...
dnn/scripts/gen_param_defs.py
浏览文件 @
a3ea1f15
...
...
@@ -80,13 +80,13 @@ class member_defs:
:attr member_alias: list of (member, alias) pairs
"""
__slots__
=
[
'name'
,
'name_field'
,
'members'
,
'default'
,
'member_alias'
]
'member_alias'
,
'combined'
]
all_enums
=
{}
"""(param_name, name) => enum"""
def
__init__
(
self
,
param_name
,
name
,
name_field
,
members
,
default
,
member_alias
):
member_alias
,
combined
=
False
):
name
=
member_defs
.
Doc
.
make
(
name
)
assert
name
.
id
[
0
].
isupper
()
members
=
tuple
(
map
(
member_defs
.
Doc
.
make
,
members
))
...
...
@@ -97,6 +97,7 @@ class member_defs:
default
=
name_field
.
index
(
default
)
assert
isinstance
(
default
,
int
)
self
.
name
=
name
self
.
combined
=
combined
self
.
name_field
=
self
.
get_name_field
(
name
.
id
,
name_field
)
self
.
members
=
members
self
.
default
=
default
...
...
@@ -197,6 +198,12 @@ class ParamDef:
self
.
name
.
id
,
name
,
name_field
,
members
,
default
,
member_alias
))
return
self
def
add_bit_combination_enum
(
self
,
name
,
*
members
,
default
=
0
,
name_field
=
None
,
member_alias
=
[]):
self
.
members
.
append
(
member_defs
.
Enum
(
self
.
name
.
id
,
name
,
name_field
,
members
,
default
,
member_alias
,
True
))
return
self
def
add_enum_alias
(
self
,
name
,
src_class
,
src_name
=
None
,
name_field
=
None
,
default
=
None
):
self
.
members
.
append
(
member_defs
.
EnumAlias
(
...
...
@@ -463,8 +470,12 @@ class SerializedDType(_ParamDefBase):
for
idx
,
emem
in
enumerate
(
e
.
members
):
self
.
_write
(
'%s = "%s"'
,
emem
,
emem
)
self
.
_write_doc
(
emem
)
self
.
_enum_member2num
.
append
(
'id({}.{}):{}'
.
format
(
qualname
,
emem
,
idx
))
if
e
.
combined
:
self
.
_enum_member2num
.
append
(
'id({}.{}):{}'
.
format
(
qualname
,
emem
,
1
<<
idx
))
else
:
self
.
_enum_member2num
.
append
(
'id({}.{}):{}'
.
format
(
qualname
,
emem
,
idx
))
for
emem
,
emem_alis
in
e
.
member_alias
:
self
.
_write
(
'%s = %s'
,
emem_alis
,
emem
)
...
...
@@ -622,6 +633,8 @@ class CPPWriter(IndentWriterBase):
for
idx
,
i
in
enumerate
(
e
.
members
):
self
.
_write_doc
(
i
)
v
=
'{} = {}'
.
format
(
i
,
idx
)
if
e
.
combined
:
v
=
'{} = 1 << {}'
.
format
(
i
,
idx
)
if
i
is
not
e
.
members
[
-
1
]
or
e
.
member_alias
:
v
+=
','
self
.
_write
(
v
)
...
...
@@ -672,7 +685,6 @@ class CPPEnumValueWriter(CPPWriter):
self
.
_write
(
'static const uint32_t %s = %s;'
,
alias
,
mem
)
self
.
_write
(
'};'
,
indent
=-
1
)
def
_on_member_enum_alias
(
self
,
e
):
s
=
e
.
src_enum
self
.
_write
(
'typedef %s::%s %s;'
,
e
.
src_class
,
e
.
src_name
,
e
.
name
)
...
...
dnn/scripts/gen_tablegen.py
浏览文件 @
a3ea1f15
...
...
@@ -91,12 +91,17 @@ class ConverterWriter(IndentWriterBase):
def
format
(
v
):
return
'
\"
{}
\"
'
.
format
(
str
(
v
))
enum_def
+=
','
.
join
(
format
(
i
)
for
i
in
e
.
members
)
enum_def
+=
"]"
if
e
.
combined
:
enum_def
+=
"], 1"
else
:
enum_def
+=
"], 0"
if
ENUM_TO_STRING_SPECIAL_RULES
.
count
((
p
.
name
,
e
.
name
)):
enum_def
+=
", 1"
# whether generate ToStringTrait
enum_def
+=
">"
self
.
_write
(
"def {} : {};"
.
format
(
td_class
,
enum_def
))
self
.
_write
(
"def {} : {};"
.
format
(
td_class
,
enum_def
))
if
self
.
_skip_current_param
:
return
...
...
dnn/src/common/algo_base.h
浏览文件 @
a3ea1f15
...
...
@@ -21,8 +21,6 @@
namespace
megdnn
{
MEGDNN_DEF_ENUM_CLASS_BIT_OPR
(
AlgoAttribute
)
#define MEGDNN_DECL_ALGO_TYPE(_type) \
uint32_t type() const override { \
return static_cast<std::underlying_type<AlgoType>::type>( \
...
...
dnn/src/common/utils.h
浏览文件 @
a3ea1f15
...
...
@@ -692,61 +692,6 @@ inline void* get_origin_ptr(const TensorND* tensor, void* ptr) {
tensor
->
layout
.
span
().
low_byte
);
}
template
<
typename
T
>
class
EnumClassBit
{
std
::
underlying_type_t
<
T
>
m_val
;
constexpr
EnumClassBit
(
std
::
underlying_type_t
<
T
>
v
)
:
m_val
(
v
)
{}
public:
constexpr
EnumClassBit
(
T
v
)
:
m_val
(
static_cast
<
std
::
underlying_type_t
<
T
>>
(
v
))
{}
constexpr
operator
T
()
const
{
return
static_cast
<
T
>
(
m_val
);
}
constexpr
explicit
operator
bool
()
const
{
return
m_val
;
}
#define DEF_OPR(op) \
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR
(
&
)
DEF_OPR
(
|
)
DEF_OPR
(
^
)
constexpr
EnumClassBit
operator
~
()
const
{
return
~
m_val
;
}
#undef DEF_OPR
};
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
return ::megdnn::EnumClassBit<cls>(x) \
op ::megdnn::EnumClassBit<cls>(y); \
} \
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \
::megdnn::EnumClassBit<cls> x, cls y) { \
return x op ::megdnn::EnumClassBit<cls>(y); \
}
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##=(cls& x, cls y) { \
x = x op ::megdnn::EnumClassBit<cls>(y); \
return x; \
}
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
return ~::megdnn::EnumClassBit<cls>(x); \
}
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/src/cuda/convolution3d/backward_filter/algo.h
浏览文件 @
a3ea1f15
...
...
@@ -218,4 +218,3 @@ public:
}
// namespace megdnn
// vim: syntax=cpp.doxygen
imperative/python/megengine/functional/debug_param.py
浏览文件 @
a3ea1f15
...
...
@@ -8,9 +8,12 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
from
..core.ops
import
builtin
from
..logger
import
get_logger
from
..utils.deprecation
import
deprecated
Strategy
=
builtin
.
ops
.
Convolution
.
Strategy
_execution_strategy
=
os
.
getenv
(
"MEGENGINE_EXECUTION_STRATEGY"
,
"HEURISTIC"
)
if
os
.
getenv
(
"MEGENGINE_CONV_EXECUTION_STRATEGY"
)
!=
None
:
...
...
@@ -19,7 +22,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None:
)
def
get_execution_strategy
()
->
str
:
def
get_execution_strategy
()
->
Strategy
:
"""
Returns the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
...
...
@@ -28,12 +31,22 @@ def get_execution_strategy() -> str:
return
_execution_strategy
def
set_execution_strategy
(
option
:
str
):
def
set_execution_strategy
(
option
):
"""
Sets the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
:param option: Decides how :class:`~.Conv2d` and :func:'~.matmul' algorithms are chosen.
Available values:
:param option: Decides how :class:`~.Conv2d`and :func:'~.matmul' algorithms are chosen.
Available value Strategy
* HEURISTIC uses heuristic to choose the fastest algorithm.
* PROFILE runs possible algorithms on real device to find the best one.
* REPRODUCIBLE uses the algorithms that is reproducible.
* OPTMIZED uses the algorithms that is optimized.
The default strategy is HEURISTIC, this options can be combined to
form a combination option, e.g. PROFILE | REPRODUCIBLE
can combined a option that uses the fastest of profiling result that is also reproducible.
Available values string:
* 'HEURISTIC' uses heuristic to choose the fastest algorithm.
* 'PROFILE' runs possible algorithms on real device to find the best one.
...
...
@@ -45,18 +58,29 @@ def set_execution_strategy(option: str):
It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'.
"""
valid_option
=
(
"HEURISTIC"
,
"PROFILE"
,
"PROFILE_HEURISTIC"
,
"PROFILE_REPRODUCIBLE"
,
"HEURISTIC_REPRODUCIBLE"
,
)
if
not
option
in
valid_option
:
raise
ValueError
(
"Valid option can only be one of {}"
.
format
(
valid_option
))
valid_string_option
=
{
"REPRODUCIBLE"
:
Strategy
.
REPRODUCIBLE
,
"HEURISTIC"
:
Strategy
.
HEURISTIC
,
"PROFILE"
:
Strategy
.
PROFILE
,
}
global
_execution_strategy
# pylint: disable=global-statement
_execution_strategy
=
option
if
isinstance
(
option
,
Strategy
):
_execution_strategy
=
option
return
assert
isinstance
(
option
,
str
)
strategy_tmp
=
Strategy
(
0
)
for
opt
in
option
.
split
(
"_"
):
if
not
opt
in
valid_string_option
:
raise
ValueError
(
"Valid option can only be one of {}, or combine them with '_'."
.
format
(
valid_string_option
.
keys
()
)
)
strategy_tmp
=
strategy_tmp
|
valid_string_option
[
opt
]
_execution_strategy
=
strategy_tmp
@
deprecated
(
version
=
"1.3"
,
reason
=
"use get_execution_strategy() instead"
)
...
...
imperative/python/test/integration/test_correctness_mnistnet.py
浏览文件 @
a3ea1f15
...
...
@@ -19,6 +19,7 @@ import megengine.autodiff as ad
import
megengine.functional
as
F
from
megengine
import
jit
from
megengine.core._trace_option
import
set_symbolic_shape
from
megengine.core.ops
import
builtin
from
megengine.core.tensor.utils
import
make_shape_tuple
from
megengine.functional.debug_param
import
set_execution_strategy
from
megengine.jit
import
SublinearMemoryConfig
...
...
@@ -33,6 +34,8 @@ from megengine.module import (
from
megengine.optimizer
import
SGD
from
megengine.tensor
import
Tensor
Strategy
=
builtin
.
ops
.
Convolution
.
Strategy
def
get_gpu_name
():
try
:
...
...
@@ -242,7 +245,7 @@ def test_correctness():
else
:
model_name
=
"mnist_model_with_test_cpu.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
set_execution_strategy
(
"HEURISTIC_REPRODUCIBLE"
)
set_execution_strategy
(
Strategy
.
HEURISTIC
|
Strategy
.
REPRODUCIBLE
)
run_train
(
model_path
,
False
,
False
,
max_err
=
1e-5
)
run_train
(
model_path
,
True
,
False
,
max_err
=
1e-5
)
...
...
imperative/tablegen/autogen.cpp
浏览文件 @
a3ea1f15
...
...
@@ -337,6 +337,20 @@ static void gen_op_def_pybind11_single(raw_ostream &os, MgbOp& op, EnumContext&
className
,
attr
->
getEnumName
(),
i
));
}
if
(
attr
->
getEnumCombinedFlag
())
{
//! define operator |
os
<<
formatv
(
"
\n
.def(
\"
__or__
\"
, []({0}::{1} s0, {0}::{1} s1) {{ "
"
\n
return static_cast<{0}::{1}>(uint32_t(s0) | uint32_t(s1));"
"
\n
})"
,
className
,
attr
->
getEnumName
());
//! define operator &
os
<<
formatv
(
"
\n
.def(
\"
__and__
\"
, []({0}::{1} s0, {0}::{1} s1) {{"
"
\n
return static_cast<{0}::{1}>(uint32_t(s0) & uint32_t(s1));"
"
\n
})"
,
className
,
attr
->
getEnumName
());
}
os
<<
formatv
(
"
\n
.def(py::init([](const std::string& in) {"
"
\n
auto&& str = normalize_enum(in);"
...
...
imperative/tablegen/helper.h
浏览文件 @
a3ea1f15
...
...
@@ -77,6 +77,9 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase {
bool
supportToString
()
const
{
return
getBaseRecord
()
->
getValueAsBit
(
"supportToString"
);
}
bool
getEnumCombinedFlag
()
const
{
return
getBaseRecord
()
->
getValueAsBit
(
"enumCombined"
);
}
};
struct
MgbHashableAttrMixin
:
public
MgbAttrWrapperBase
{
...
...
sdk/load-and-run/src/mgblar.cpp
浏览文件 @
a3ea1f15
...
...
@@ -142,8 +142,16 @@ R"__usage__(
#if MGB_ENABLE_FASTRUN
R"__usage__(
--fast-run
Enable fast-run mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes.
This param will be deperated later, please replace with param --full-profile.
--full-profile
Enable full-profile mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes, all algorithms will be profiled
include naive algorithms.
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
--fast-profile
Enable fast-profile mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes, this mode will only profile the
well optimized algorithms to get the profile result fast.
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
)__usage__"
#endif
...
...
@@ -511,7 +519,8 @@ struct Args {
bool
disable_assert_throw
=
false
;
bool
share_param_mem
=
false
;
#if MGB_ENABLE_FASTRUN
bool
use_fast_run
=
false
;
bool
use_full_profile
=
false
;
bool
use_fast_profile
=
false
;
#endif
bool
reproducible
=
false
;
std
::
string
fast_run_cache_path
;
...
...
@@ -695,18 +704,20 @@ void run_test_st(Args &env) {
using
S
=
opr
::
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
;
S
strategy
=
S
::
HEURISTIC
;
#if MGB_ENABLE_FASTRUN
if
(
env
.
use_f
ast_run
)
{
if
(
env
.
use_f
ull_profile
)
{
if
(
env
.
reproducible
)
{
strategy
=
S
::
PROFILE
_
REPRODUCIBLE
;
strategy
=
S
::
PROFILE
|
S
::
REPRODUCIBLE
;
}
else
{
strategy
=
S
::
PROFILE
;
}
}
else
if
(
env
.
use_fast_profile
)
{
strategy
=
S
::
PROFILE
|
S
::
OPTMIZED
;
}
else
if
(
env
.
reproducible
)
{
strategy
=
S
::
HEURISTIC
_
REPRODUCIBLE
;
strategy
=
S
::
HEURISTIC
|
S
::
REPRODUCIBLE
;
}
#else
if
(
env
.
reproducible
)
{
strategy
=
S
::
HEURISTIC
_
REPRODUCIBLE
;
strategy
=
S
::
HEURISTIC
|
S
::
REPRODUCIBLE
;
}
#endif
mgb
::
gopt
::
modify_opr_algo_strategy_inplace
(
vars
,
strategy
);
...
...
@@ -729,11 +740,12 @@ void run_test_st(Args &env) {
std
::
make_shared
<
InFilePersistentCache
>
(
buf
.
get
(),
flen
));
#if MGB_ENABLE_FASTRUN
}
else
{
mgb_assert
(
env
.
use_fast_run
,
"fast-run should be enabled"
);
mgb_assert
(
env
.
use_full_profile
||
env
.
use_fast_profile
,
"fast-run or fast-profile should be enabled"
);
PersistentCache
::
set_impl
(
std
::
make_shared
<
InFilePersistentCache
>
());
}
if
(
!
env
.
use_f
ast_run
)
if
(
!
env
.
use_f
ull_profile
&&
!
env
.
use_fast_profile
)
#endif
mgb
::
gopt
::
enable_opr_use_profiling_cache_inplace
(
vars
);
}
...
...
@@ -1314,7 +1326,18 @@ Args Args::from_argv(int argc, char **argv) {
}
#if MGB_ENABLE_FASTRUN
if
(
!
strcmp
(
argv
[
i
],
"--fast-run"
))
{
ret
.
use_fast_run
=
true
;
mgb_log_warn
(
"--fast-run param will be deperated later, please replace "
"with --full-profile or --fast-profile."
);
ret
.
use_full_profile
=
true
;
continue
;
}
if
(
!
strcmp
(
argv
[
i
],
"--full-profile"
))
{
ret
.
use_full_profile
=
true
;
continue
;
}
if
(
!
strcmp
(
argv
[
i
],
"--fast-profile"
))
{
ret
.
use_fast_profile
=
true
;
continue
;
}
#endif
...
...
src/core/impl/utils/persistent_cache.cpp
浏览文件 @
a3ea1f15
...
...
@@ -188,7 +188,7 @@ AlgoChooserProfileCache::get(const Key &key) {
auto
entry_len
=
read_uint32
();
mgb_assert
(
buf
+
entry_len
<=
buf_end
);
auto
nr
=
sscanf
(
reinterpret_cast
<
const
char
*>
(
buf
),
ENTRY_FMT
,
&
i
.
reproducibl
e
,
&
i
.
time
,
&
i
.
workspace
);
&
i
.
attribut
e
,
&
i
.
time
,
&
i
.
workspace
);
mgb_assert
(
nr
==
3
);
buf
+=
entry_len
;
}
...
...
@@ -210,10 +210,10 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
auto
&&
cur
=
result
[
i
];
if
(
prev
.
workspace
<=
cur
.
workspace
&&
prev
.
reproducible
==
cur
.
reproducibl
e
)
{
prev
.
attribute
==
cur
.
attribut
e
)
{
result
.
erase
(
result
.
begin
()
+
i
);
}
else
{
++
i
;
++
i
;
}
}
...
...
@@ -235,8 +235,8 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
write_uint32
(
0
);
pos
=
val
.
size
();
val
.
resize
(
pos
+
SPR_SIZE
);
uint32_t
nr
=
snprintf
(
&
val
[
pos
],
SPR_SIZE
,
ENTRY_FMT
,
i
.
reproducible
,
i
.
time
,
i
.
workspace
);
uint32_t
nr
=
snprintf
(
&
val
[
pos
],
SPR_SIZE
,
ENTRY_FMT
,
i
.
attribute
,
i
.
time
,
i
.
workspace
);
//! for memory boundary failed, snprintf ret do not contain \0
nr
+=
1
;
mgb_assert
(
nr
<
SPR_SIZE
);
...
...
src/core/include/megbrain/common.h
浏览文件 @
a3ea1f15
...
...
@@ -12,6 +12,8 @@
#pragma once
#include "megbrain_build_config.h"
#include "megbrain/opr/param_defs.h"
#include "megdnn/basic_types.h"
#include <memory>
#include <string>
...
...
@@ -242,6 +244,16 @@ inline constexpr std::size_t operator"" _z(unsigned long long n) {
return
n
;
}
#endif
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls)
}
// namespace mgb
namespace
megdnn
{
namespace
param
{
MGB_DEF_ENUM_CLASS_BIT_OPR
(
ExecutionPolicy
::
Strategy
)
}
}
// namespace megdnn
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/core/include/megbrain/comp_node.h
浏览文件 @
a3ea1f15
...
...
@@ -12,7 +12,6 @@
#pragma once
#include "megbrain/utils/hash.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/metahelper.h"
#include "megbrain/utils/thin/hash_table.h"
#include "megbrain/utils/thread.h"
...
...
src/core/include/megbrain/graph/operator_node.h
浏览文件 @
a3ea1f15
...
...
@@ -16,7 +16,6 @@
#include "megbrain/graph/symbol_var.h"
#include "megbrain/utils/hashable.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/thin/hash_table.h"
#include "megbrain/utils/small_vector.h"
...
...
src/core/include/megbrain/graph/var_node.h
浏览文件 @
a3ea1f15
...
...
@@ -12,7 +12,6 @@
#pragma once
#include "megbrain/graph/bases.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/comp_node_sync_manager.h"
#include "megbrain/utils/small_vector.h"
#include "megbrain/utils/mempool.h"
...
...
src/core/include/megbrain/ir/base.td
浏览文件 @
a3ea1f15
...
...
@@ -33,10 +33,11 @@ class MgbHashableAttrMixin {
string reprFunction = "std::to_string($0)";
}
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit toString> {
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit
combined, bit
toString> {
string parentNamespace = namespace;
string enumName = name;
list<string> enumMembers = members;
bit enumCombined = combined;
bit supportToString = toString;
}
...
...
@@ -166,8 +167,8 @@ class MgbTupleAttr<list<MgbAttrWrapper> args>:
}
// -- enum types
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit toString=0>:
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, toString> {
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit
combined, bit
toString=0>:
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members,
combined,
toString> {
let storageType = "::mlir::IntegerAttr";
let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
...
...
@@ -176,7 +177,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members, bit t
}
class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>:
MgbEnumAttr<namespace, enumName, base.enumMembers>, MgbAliasAttrMixin<base>;
MgbEnumAttr<namespace, enumName, base.enumMembers
, 0
>, MgbAliasAttrMixin<base>;
// -- other types
def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> {
...
...
src/core/include/megbrain/utils/enum_class_bit.h
已删除
100644 → 0
浏览文件 @
80f00643
/**
* \file src/core/include/megbrain/utils/enum_class_bit.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <type_traits>
namespace
mgb
{
template
<
typename
T
>
class
EnumClassBit
{
std
::
underlying_type_t
<
T
>
m_val
;
constexpr
EnumClassBit
(
std
::
underlying_type_t
<
T
>
v
)
:
m_val
(
v
)
{
}
public:
constexpr
EnumClassBit
(
T
v
)
:
m_val
(
static_cast
<
std
::
underlying_type_t
<
T
>>
(
v
))
{
}
constexpr
operator
T
()
const
{
return
static_cast
<
T
>
(
m_val
);
}
constexpr
explicit
operator
bool
()
const
{
return
m_val
;
}
#define DEF_OPR(op) \
constexpr EnumClassBit operator op (\
const EnumClassBit &rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR
(
&
)
DEF_OPR
(
|
)
DEF_OPR
(
^
)
constexpr
EnumClassBit
operator
~
()
const
{
return
~
m_val
;
}
#undef DEF_OPR
};
}
#define _MGB_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::mgb::EnumClassBit<cls> operator op (cls x, cls y) { \
return ::mgb::EnumClassBit<cls>(x) op ::mgb::EnumClassBit<cls>(y); \
} \
inline constexpr ::mgb::EnumClassBit<cls> operator op ( \
::mgb::EnumClassBit<cls> x, cls y) { \
return x op ::mgb::EnumClassBit<cls>(y); \
}
#define _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##= (cls& x, cls y) { \
x = x op ::mgb::EnumClassBit<cls>(y); \
return x; \
}
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MGB_DECBO_SINGLE_OPR(cls, &) \
_MGB_DECBO_SINGLE_OPR(cls, |) \
_MGB_DECBO_SINGLE_OPR(cls, ^) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::mgb::EnumClassBit<cls> operator ~ (cls x) { \
return ~::mgb::EnumClassBit<cls>(x); \
} \
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/core/include/megbrain/utils/persistent_cache.h
浏览文件 @
a3ea1f15
...
...
@@ -100,8 +100,7 @@ namespace mgb {
struct
ResultEntry
{
std
::
string
algo
;
//! identifier of the algorithm
//! sscanf will up bool as int
int
reproducible
;
//! whether algorithm is reproducible
uint32_t
attribute
;
//! algo attribute, e.g. reproducible
double
time
;
//! execution time in seconds
size_t
workspace
;
//! workspace in bytes
};
...
...
src/gopt/impl/inference.cpp
浏览文件 @
a3ea1f15
...
...
@@ -54,7 +54,6 @@ using namespace gopt;
namespace
{
template
<
typename
SharedDeviceTensor
,
typename
MultipleDeviceTensorHolder
>
void
param_merge
(
OptState
&
opt_state
)
{
auto
rewriter
=
opt_state
.
graph
().
make_rewriter
();
...
...
@@ -102,7 +101,7 @@ void param_merge(OptState& opt_state) {
rewriter
.
apply_inplace
();
}
}
}
// namespace
/* ================ global functions ================ */
...
...
@@ -190,12 +189,10 @@ void gopt::enable_opr_algo_profiling_inplace(
void
gopt
::
enable_opr_use_profiling_cache_inplace
(
const
VarNodeArrayView
&
dest_vars
)
{
modify_opr_algo_strategy_inplace
(
dest_vars
,
opr
::
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
::
PROFILE_HEURISTIC
);
using
S
=
megdnn
::
param
::
ExecutionPolicy
::
Strategy
;
modify_opr_algo_strategy_inplace
(
dest_vars
,
S
::
PROFILE
|
S
::
HEURISTIC
);
}
void
gopt
::
set_opr_algo_workspace_limit_inplace
(
const
VarNodeArrayView
&
dest_vars
,
size_t
workspace_limit
)
{
static
const
ThinHashMap
<
Typeinfo
*
,
void
(
*
)(
OperatorNodeBase
&
,
size_t
)
>
...
...
src/gopt/test/inference.cpp
浏览文件 @
a3ea1f15
...
...
@@ -1693,7 +1693,22 @@ TEST(TestGoptInference, ProfileCache) {
using
S
=
opr
::
Convolution
::
ExecutionPolicy
::
Strategy
;
ASSERT_EQ
(
S
::
HEURISTIC
,
conv
.
execution_policy_transient
().
strategy
);
gopt
::
enable_opr_use_profiling_cache_inplace
({
z
+
2.3
f
});
ASSERT_EQ
(
S
::
PROFILE_HEURISTIC
,
conv
.
execution_policy
().
strategy
);
ASSERT_EQ
(
S
::
PROFILE
|
S
::
HEURISTIC
,
conv
.
execution_policy
().
strategy
);
}
TEST
(
TestGoptInference
,
FastProfileCache
)
{
HostTensorGenerator
<>
gen
;
auto
graph
=
ComputingGraph
::
make
();
auto
host_x
=
gen
({
4
,
3
,
8
,
9
}),
host_y
=
gen
({
2
,
3
,
3
,
3
});
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x
),
y
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_y
),
z
=
opr
::
Convolution
::
make
(
x
,
y
);
auto
&&
conv
=
z
.
node
()
->
owner_opr
()
->
cast_final_safe
<
opr
::
Convolution
>
();
using
S
=
opr
::
Convolution
::
ExecutionPolicy
::
Strategy
;
ASSERT_EQ
(
S
::
HEURISTIC
,
conv
.
execution_policy_transient
().
strategy
);
gopt
::
modify_opr_algo_strategy_inplace
({
z
+
2.3
f
},
S
::
PROFILE
|
S
::
OPTMIZED
);
ASSERT_EQ
(
S
::
PROFILE
|
S
::
OPTMIZED
,
conv
.
execution_policy
().
strategy
);
}
TEST
(
TestGoptInference
,
AlgoWorkspaceLimit
)
{
...
...
src/opr/impl/dnn/dnn.sereg.h
浏览文件 @
a3ea1f15
...
...
@@ -20,7 +20,6 @@
#include "megbrain/opr/dnn/lrn.h"
#include "megbrain/opr/dnn/fake_quant.h"
#include "megbrain/opr/dnn/tqt.h"
#include "megbrain/serialization/sereg.h"
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs/nn.h"
...
...
src/opr/impl/search_policy/algo_chooser.cpp
浏览文件 @
a3ea1f15
...
...
@@ -284,8 +284,9 @@ namespace mgb {
namespace
opr
{
template
<
typename
Opr
>
void
AlgoChooser
<
Opr
>::
profile
(
ExeContext
&
ctx
,
bool
require_reproducible
)
{
if
(
ctx
.
get_profile_result_from_cache
(
require_reproducible
).
valid
())
void
AlgoChooser
<
Opr
>::
profile
(
ExeContext
&
ctx
,
ExecutionStrategy
select_strategy
)
{
if
(
ctx
.
get_profile_result_from_cache
(
select_strategy
).
valid
())
return
;
AlgoChooserProfileCache
::
Result
prof_rst
;
...
...
@@ -305,7 +306,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
algo
.
name
.
c_str
(),
str_on_inp_shape
.
c_str
());
ImplExecutionPolicy
policy
;
policy
.
algo
=
algo
.
desc
;
ctx
.
construct_execution_policy
(
require_reproducible
,
policy
);
ctx
.
construct_execution_policy
(
select_strategy
,
policy
);
if
(
ctx
.
get_workspace_size_bytes
(
policy
)
>=
workspace_limit
)
continue
;
...
...
@@ -354,7 +355,8 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
template
<
typename
Opr
>
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
AlgoChooser
<
Opr
>::
choose_by_profile
(
ExeContext
&
ctx
,
bool
require_reproducible
,
AlgoChooser
<
Opr
>::
choose_by_profile
(
ExeContext
&
ctx
,
ExecutionStrategy
select_strategy
,
bool
enable_update
)
{
MIDOUT_B
(
Opr
,
midout_iv
(
MGB_HASH_STR
(
"AlgoChooser::choose_by_profile"
)))
if
(
ctx
.
owner_graph
()
->
options
().
no_profiling_on_shape_change
)
{
...
...
@@ -376,11 +378,11 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
to_fixed_layouts
<
_Opr
>
(
_item
.
layouts
),
megdnn_opr
.
get
(),
_item
.
param
,
ctx
.
mgb_opr
(),
ctx
.
comp_node
(),
ctx
.
execution_policy
(),
ctx
.
allow_weight_preprocess
());
AlgoChooser
<
_Opr
>::
profile
(
sub_ctx
,
require_reproducible
);
AlgoChooser
<
_Opr
>::
profile
(
sub_ctx
,
select_strategy
);
});
}
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
policy
;
ctx
.
construct_execution_policy
(
require_reproducible
,
policy
);
ctx
.
construct_execution_policy
(
select_strategy
,
policy
);
return
policy
;
MIDOUT_E
}
...
...
@@ -402,11 +404,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
ImplExecutionPolicy
policy
;
if
(
auto
algo_choose_hook
=
mgb_opr
->
algo_chooser
())
{
policy
=
algo_choose_hook
(
mgb_opr
);
ctx
.
construct_execution_policy
(
mgb_opr
->
execution_policy
().
strategy
==
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
::
HEURISTIC_REPRODUCIBLE
,
policy
,
false
);
ctx
.
construct_execution_policy
((
ExecutionStrategy
::
HEURISTIC
|
ExecutionStrategy
::
REPRODUCIBLE
),
policy
,
false
);
}
if
(
!
policy
.
algo
.
valid
())
{
policy
=
get_policy
(
ctx
);
...
...
@@ -419,10 +419,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
Algorithm
*
palgo
=
megdnn_opr
->
get_algorithm_from_desc
(
policy
.
algo
);
mgb_assert
(
palgo
,
"Unknown algo description"
);
ret
.
append
(
"): algo="
+
std
::
string
(
palgo
->
name
()));
ret
.
append
(
ssprintf
(
" workspace=%.2fMiB
reproducibl
e=%d"
,
ret
.
append
(
ssprintf
(
" workspace=%.2fMiB
attirbut
e=%d"
,
workspace
/
(
1024
*
1024.0
),
palgo
->
contain_attribute
(
megdnn
::
AlgoAttribute
::
REPRODUCIBLE
)));
static_cast
<
uint32_t
>
(
palgo
->
attribute
())));
mgb_log_debug
(
"%s"
,
ret
.
c_str
());
megdnn_opr
->
execution_policy
()
=
policy
;
...
...
@@ -432,41 +431,39 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
template
<
typename
Opr
>
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
AlgoChooser
<
Opr
>::
get_policy
(
ExeContext
&
ctx
)
{
using
S
=
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
;
MGB_MARK_USED_VAR
(
TIMEOUT_TOLERANCE
);
switch
(
ctx
.
execution_policy
().
strategy
)
{
case
S
::
HEURISTIC
:
return
ctx
.
choose_by_heuristic
();
case
S
::
HEURISTIC_REPRODUCIBLE
:
return
ctx
.
choose_by_heuristic
(
tru
e
);
case
S
::
PROFILE_HEURISTIC
:
{
ImplExecutionPolicy
policy
=
choose_by_profile
(
ctx
,
false
,
false
);
if
(
!
policy
.
algo
.
valid
())
policy
=
ctx
.
choose_by_heuristic
();
return
policy
;
}
auto
opr_strategy
=
ctx
.
execution_policy
().
strategy
;
if
((
opr_strategy
&
ExecutionStrategy
::
HEURISTIC
)
&&
(
opr_strategy
&
ExecutionStrategy
::
PROFILE
))
{
ImplExecutionPolicy
policy
=
choose_by_profile
(
ctx
,
opr_strategy
,
fals
e
);
if
(
!
policy
.
algo
.
valid
())
policy
=
ctx
.
choose_by_heuristic
(
opr_strategy
);
return
policy
;
}
else
if
((
opr_strategy
&
ExecutionStrategy
::
HEURISTIC
))
{
return
ctx
.
choose_by_heuristic
(
opr_strategy
)
;
}
#if MGB_ENABLE_FASTRUN
case
S
::
PROFILE
:
return
choose_by_profile
(
ctx
,
false
);
case
S
::
PROFILE_REPRODUCIBLE
:
return
choose_by_profile
(
ctx
,
true
);
else
if
(
opr_strategy
&
ExecutionStrategy
::
PROFILE
)
{
return
choose_by_profile
(
ctx
,
opr_strategy
);
}
#endif
default:
mgb_throw
(
GraphError
,
"bad convolution ExecutionPolicy strategy"
);
else
{
mgb_throw
(
GraphError
,
"bad convolution ExecutionPolicy strategy"
);
}
}
#define INST(Opr)
\
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy
\
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx);
\
template void AlgoChooser<megdnn::Opr>::profile(
\
ExeContext& ctx, bool require_reproducible);
\
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy
\
AlgoChooser<megdnn::Opr>::choose_by_profile(
\
ExeContext& ctx,
bool require_reproducible, bool enable_update);
\
template size_t AlgoChooser<megdnn::Opr>::setup_algo(
\
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr,
\
const MGBOpr* mgb_opr, bool allow_weight_preprocess);
\
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \
template void AlgoChooser<megdnn::Opr>::profile(
ExeContext& ctx,
\
ExecutionStrategy);
\
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::choose_by_profile( \
ExeContext& ctx,
ExecutionStrategy, bool enable_update);
\
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const MGBOpr* mgb_opr, bool allow_weight_preprocess);
MGB_FOREACH_FASTRUN_OPR
(
INST
)
...
...
@@ -498,7 +495,7 @@ AlgoChooser<Opr>::ExeContext::ExeContext(
template
<
typename
Opr
>
typename
AlgoChooser
<
Opr
>::
ImplAlgo
AlgoChooser
<
Opr
>::
ExeContext
::
get_profile_result_from_cache
(
bool
require_reproducible
)
const
{
ExecutionStrategy
select_strategy
)
const
{
MIDOUT_B
(
Opr
,
midout_iv
(
MGB_HASH_STR
(
"AlgoChooser::ExeContext::get_profile_result_from_cache"
)))
...
...
@@ -522,7 +519,9 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
if
(
prof
.
empty
())
return
{};
for
(
auto
&&
i
:
prof
)
{
if
((
!
require_reproducible
||
i
.
reproducible
))
{
if
(
!
(
select_strategy
&
ExecutionStrategy
::
REPRODUCIBLE
)
||
static_cast
<
AlgoAttribute
>
(
i
.
attribute
)
&
AlgoAttribute
::
REPRODUCIBLE
)
{
auto
iter
=
algo_map
.
find
(
i
.
algo
);
mgb_assert
(
iter
!=
algo_map
.
end
(),
"algorithm %s exists in "
...
...
@@ -550,7 +549,8 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
template
<
typename
Opr
>
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
AlgoChooser
<
Opr
>::
ExeContext
::
choose_by_heuristic
(
bool
reproducible
)
const
{
AlgoChooser
<
Opr
>::
ExeContext
::
choose_by_heuristic
(
ExecutionStrategy
select_strategy
)
const
{
if
(
m_execution_policy
.
workspace_limit
!=
std
::
numeric_limits
<
decltype
(
m_execution_policy
.
workspace_limit
)
>::
max
())
{
...
...
@@ -558,6 +558,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
"workspace_limit should not be setted if choose algo by "
"heuristic"
);
}
bool
reproducible
=
static_cast
<
bool
>
(
select_strategy
&
ExecutionStrategy
::
REPRODUCIBLE
);
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
owner_graph
(),
m_cn
,
m_execution_policy
.
workspace_limit
);
ImplExecutionPolicy
policy
;
...
...
@@ -579,7 +581,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
to_fixed_layouts
<
_Opr
>
(
_item
.
layouts
),
megdnn_opr
.
get
(),
_item
.
param
,
m_base_mgb_opr
,
m_cn
,
m_execution_policy
,
m_allow_weight_preprocess
);
policy
.
sub_policy
.
push_back
(
sub_ctx
.
choose_by_heuristic
(
reproducible
));
policy
.
sub_policy
.
push_back
(
sub_ctx
.
choose_by_heuristic
(
select_strategy
));
});
return
policy
;
...
...
@@ -588,9 +591,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
template
<
typename
Opr
>
std
::
vector
<
typename
AlgoChooser
<
Opr
>::
ImplAlgo
>
AlgoChooser
<
Opr
>::
ExeContext
::
get_all_candidates
()
const
{
auto
heu
=
choose_by_heuristic
();
auto
&&
ret
=
APPLY
(
m_megdnn_opr
->
get_all_algorithms_info
(
args
...),
m_layouts
);
auto
heu
=
choose_by_heuristic
(
ExecutionStrategy
::
HEURISTIC
);
auto
&&
ret
=
APPLY
(
m_megdnn_opr
->
get_all_algorithms_info
(
args
...),
m_layouts
);
bool
found
=
false
;
for
(
size_t
i
=
0
;
i
<
ret
.
size
();
++
i
)
{
if
(
ret
[
i
].
desc
==
heu
.
algo
)
{
...
...
@@ -611,19 +613,21 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
template
<
typename
Opr
>
void
AlgoChooser
<
Opr
>::
ExeContext
::
construct_execution_policy
(
bool
require_reproducible
,
ExecutionStrategy
select_strategy
,
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
)
const
{
bool
reproducible
=
static_cast
<
bool
>
(
select_strategy
&
ExecutionStrategy
::
REPRODUCIBLE
);
if
(
!
policy
.
algo
.
valid
())
{
if
(
retrive_from_cache
)
{
policy
.
algo
=
get_profile_result_from_cache
(
require_reproducible
).
desc
;
get_profile_result_from_cache
(
select_strategy
).
desc
;
}
else
{
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
owner_graph
(),
m_cn
,
m_execution_policy
.
workspace_limit
);
policy
.
algo
=
APPLY
(
m_megdnn_opr
->
get_algorithm_info_heuristic
(
args
...,
workspace_limit
,
re
quire_re
producible
),
reproducible
),
m_layouts
)
.
desc
;
}
...
...
@@ -647,7 +651,7 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
_item
.
param
,
m_base_mgb_opr
,
m_cn
,
m_execution_policy
,
m_allow_weight_preprocess
);
policy
.
sub_policy
.
push_back
({});
sub_ctx
.
construct_execution_policy
(
require_reproducible
,
sub_ctx
.
construct_execution_policy
(
select_strategy
,
policy
.
sub_policy
.
back
(),
retrive_from_cache
);
});
...
...
@@ -718,8 +722,7 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(
return
None
;
return
AlgoChooserProfileCache
::
ResultEntry
{
palgo
->
name
(),
palgo
->
contain_attribute
(
megdnn
::
AlgoAttribute
::
REPRODUCIBLE
),
static_cast
<
uint32_t
>
(
palgo
->
attribute
()),
rst
.
val
().
time
,
param
.
workspace
};
}
...
...
@@ -768,10 +771,10 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
bool allow_weight_preprocess); \
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::ExeContext::choose_by_heuristic( \
bool reproducible) const;
\
ExecutionStrategy select_strategy) const;
\
template typename AlgoChooser<megdnn::Opr>::ImplAlgo \
AlgoChooser<megdnn::Opr>::ExeContext::get_profile_result_from_cache( \
bool require_reproducible) const;
\
ExecutionStrategy select_strategy) const;
\
template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \
AlgoChooser<megdnn::Opr>::ExeContext::get_all_candidates() const; \
template size_t \
...
...
@@ -780,7 +783,7 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
policy) const; \
template void \
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy( \
bool require_reproducible,
\
ExecutionStrategy select_strategy,
\
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
bool retrive_from_cache) const; \
template Maybe<AlgoChooserProfileCache::ResultEntry> \
...
...
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
浏览文件 @
a3ea1f15
...
...
@@ -35,6 +35,13 @@ MGB_FOREACH_FASTRUN_OPR(cb)
#undef cb
namespace
mgb
{
//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum
//! and megdnn::detail::AlgoAttribute enum
using
ExecutionStrategy
=
megdnn
::
param
::
ExecutionPolicy
::
Strategy
;
using
AlgoAttribute
=
megdnn
::
AlgoAttribute
;
namespace
opr
{
/* =================== AlgoChooser =================== */
...
...
@@ -103,7 +110,7 @@ public:
const
FixedTensorLayouts
&
layouts
()
const
{
return
m_layouts
;
}
ImplExecutionPolicy
choose_by_heuristic
(
bool
reproducible
=
false
)
const
;
ExecutionStrategy
select_strategy
)
const
;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
...
...
@@ -126,19 +133,20 @@ public:
const
ImplExecutionPolicy
&
policy
,
double
&
timeout
)
const
;
//! get all profile algorithm from cache, return invalid if not exists
ImplAlgo
get_profile_result_from_cache
(
bool
require_reproducible
)
const
;
ImplAlgo
get_profile_result_from_cache
(
ExecutionStrategy
select_strategy
)
const
;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param
require_reproducible select algo which is reproducible
* \param
select_strategy select algo which matched this strategy
* \param policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
*/
void
construct_execution_policy
(
bool
require_reproducible
,
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
=
true
)
const
;
void
construct_execution_policy
(
ExecutionStrategy
select_strategy
,
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
=
true
)
const
;
private:
Maybe
<
PreprocessFilter
<
Opr
>>
construct_fake_preprocess_filter
()
const
;
...
...
@@ -153,11 +161,11 @@ private:
//! profile and save to cache
static
void
profile
(
ExeContext
&
ctx
,
bool
require_reproducible
);
static
void
profile
(
ExeContext
&
ctx
,
ExecutionStrategy
select_strategy
);
static
ImplExecutionPolicy
choose_by_profile
(
ExeContext
&
ctx
,
bool
require_reproducible
,
bool
enable_update
=
true
);
static
ImplExecutionPolicy
choose_by_profile
(
ExeContext
&
ctx
,
ExecutionStrategy
select_strategy
,
bool
enable_update
=
true
);
public:
/*!
...
...
src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h
浏览文件 @
a3ea1f15
...
...
@@ -13,7 +13,6 @@
#pragma once
#include "megbrain/graph/operator_node.h"
#include "megbrain/opr/param_defs.h"
#include "megdnn/oprs/base.h"
#include "megdnn/oprs/nn.h"
...
...
@@ -73,7 +72,6 @@ protected:
};
}
// namespace mixin
}
// namespace opr
}
// namespace mgb
...
...
src/opr/test/blas.cpp
浏览文件 @
a3ea1f15
...
...
@@ -429,10 +429,11 @@ TEST(TestOprDNN, MatrixMulExePolicy) {
auto
cn
=
CompNode
::
load
(
"cpux"
);
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE
_
HEURISTIC
})
{
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
auto
graph
=
ComputingGraph
::
make
();
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
a3ea1f15
...
...
@@ -355,11 +355,13 @@ TEST(TestOprDNN, ConvBiasExePolicy) {
auto
cn
=
CompNode
::
load
(
"cpux"
);
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
OPTMIZED
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
auto
graph
=
ComputingGraph
::
make
();
HostTensorGenerator
<>
gen
;
...
...
@@ -397,7 +399,8 @@ TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) {
auto
cn
=
CompNode
::
load
(
"cpux"
);
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
PROFILE_REPRODUCIBLE
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
})
{
auto
graph
=
ComputingGraph
::
make
();
HostTensorGenerator
<>
gen
;
...
...
@@ -439,10 +442,12 @@ TEST(TestOprDNN, ConvolutionExePolicy) {
PersistentCacheHook
cache_hook
{
on_get
};
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
OPTMIZED
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
using
Checker
=
AutoOprChecker
<
2
,
1
>
;
...
...
@@ -522,10 +527,11 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) {
PersistentCacheHook
cache_hook
{
on_get
};
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
(
S
::
PROFILE
|
S
::
REPRODUCIBLE
),
S
(
S
::
PROFILE
|
S
::
HEURISTIC
)})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
(
S
::
PROFILE
|
S
::
HEURISTIC
)
})
{
#endif
using
Checker
=
AutoOprChecker
<
2
,
1
>
;
...
...
@@ -1183,9 +1189,12 @@ TEST(TestOprDNN, Convolution3DExePolicy) {
using
S
=
Policy
::
Strategy
;
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
using
Checker
=
AutoOprChecker
<
2
,
1
>
;
...
...
@@ -1660,10 +1669,12 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) {
PersistentCacheHook
cache_hook
{
on_get
};
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
OPTMIZED
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
auto
make_graph
=
[
&
](
const
Checker
::
SymInpArray
&
inputs
)
->
Checker
::
SymOutArray
{
...
...
@@ -1769,10 +1780,12 @@ TEST(TestOprDNN, DeformableConvForward) {
Param
param
;
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
OPTMIZED
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
auto
make_graph
=
[
&
](
const
Checker
::
SymInpArray
&
inputs
)
->
Checker
::
SymOutArray
{
...
...
@@ -1936,10 +1949,12 @@ TEST(TestOprDNN, BatchConvBiasForward) {
param
.
sparse
=
Param
::
Sparse
::
DENSE
;
#if MGB_ENABLE_FASTRUN
for
(
auto
strategy
:
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE_REPRODUCIBLE
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
PROFILE
,
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
REPRODUCIBLE
,
S
::
PROFILE
|
S
::
HEURISTIC
,
S
::
PROFILE
|
S
::
OPTMIZED
})
{
#else
for
(
auto
strategy
:
{
S
:
HEURISTIC
,
S
::
PROFILE_HEURISTIC
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
:
HEURISTIC
,
S
::
PROFILE
|
S
::
HEURISTIC
})
{
#endif
auto
make_quantized
=
[
&
](
SymbolVar
x
,
const
DType
&
dtype
)
{
...
...
@@ -2080,7 +2095,8 @@ TEST(TestOprDNN, HeuristicReproducible) {
constexpr
size_t
PH
=
1
,
PW
=
1
,
SH
=
1
,
SW
=
1
;
for
(
auto
strategy
:
{
S
::
HEURISTIC
,
S
::
HEURISTIC_REPRODUCIBLE
})
{
for
(
auto
strategy
:
SmallVector
<
S
>
{
S
::
HEURISTIC
,
S
::
HEURISTIC
|
S
::
REPRODUCIBLE
})
{
VarNode
*
bwd_flt
;
auto
make_graph
=
[
&
](
const
Checker
::
SymInpArray
&
inputs
)
->
Checker
::
SymOutArray
{
...
...
@@ -2126,7 +2142,7 @@ TEST(TestOprDNN, HeuristicReproducible) {
megdnn
::
Algorithm
*
palgo
=
megdnn_opr
->
get_algorithm_from_desc
(
algo
);
mgb_assert
(
palgo
,
"Unknown algo description"
);
if
(
strategy
==
S
::
HEURISTIC_REPRODUCIBLE
)
{
if
(
strategy
==
S
(
S
::
HEURISTIC
|
S
::
REPRODUCIBLE
)
)
{
EXPECT_TRUE
(
palgo
->
contain_attribute
(
megdnn
::
AlgoAttribute
::
REPRODUCIBLE
));
}
...
...
test/src/include/megbrain/test/helper.h
浏览文件 @
a3ea1f15
...
...
@@ -43,6 +43,7 @@ namespace megdnn {
std
::
ostream
&
ostr
,
const
DType
&
dt
)
{
return
ostr
<<
dt
.
name
();
}
}
// namespace megdnn
namespace
mgb
{
...
...
tools/param_defs/mgb_opr_param_defs.py
浏览文件 @
a3ea1f15
...
...
@@ -18,7 +18,7 @@ pdef('PersistentOutputStorage').add_fields(
add_const
(
'int32'
,
'INVALID_AXIS'
,
'MAX_NDIM'
).
add_fields
(
'int32'
,
'axis'
,
'INVALID_AXIS'
))
(
pdef
(
'ExecutionPolicy'
,
'specify how to select an algorithm for an operator'
).
(
pdef
(
'ExecutionPolicy'
,
version
=
0
,
is_legacy
=
True
).
add_enum
(
'Strategy'
,
Doc
(
'HEURISTIC'
,
'use heuristic to choose the fastest algorithm'
),
Doc
(
'HEURISTIC_REPRODUCIBLE'
,
'use heuristic to choose the fastest algorithm, '
...
...
@@ -33,6 +33,20 @@ pdef('PersistentOutputStorage').add_fields(
Doc
(
'workspace_limit'
,
'workspace limit in bytes'
),
str
(
2
**
64
-
1
)
+
'ull'
))
(
pdef
(
'ExecutionPolicy'
,
'specify how to select an algorithm for an operator'
,
version
=
1
).
add_bit_combination_enum
(
'Strategy'
,
Doc
(
'HEURISTIC'
,
'use heuristic to choose the fastest algorithm'
),
Doc
(
'PROFILE'
,
'run possible algorithms on real device to find the best'
),
Doc
(
'REPRODUCIBLE'
,
'when profile or heuristic algo selection it require the algos'
'must be reproducible'
),
Doc
(
'OPTMIZED'
,
'profile require algos are optmized to achieve fast-profile'
)).
add_fields
(
'uint64'
,
Doc
(
'workspace_limit'
,
'workspace limit in bytes'
),
str
(
2
**
64
-
1
)
+
'ull'
))
(
pdef
(
'AssertEqual'
).
add_fields
(
'float32'
,
Doc
(
'maxerr'
,
'max allowed error; error is defined as the minimal '
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录