提交 a3ea1f15 编写于 作者: M Megvii Engine Team

feat(mgb/opr): add fast profile and combined Execution strategy

GitOrigin-RevId: 843dc3a7907bc6ec9a728ec6425b7910d9c136c5
上级 80f00643
......@@ -506,10 +506,66 @@ struct DynOutMallocPolicyCall {
}
};
template <typename T>
class EnumClassBit {
std::underlying_type_t<T> m_val;
constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {}
public:
constexpr EnumClassBit(T v)
: m_val(static_cast<std::underlying_type_t<T>>(v)) {}
constexpr operator T() const { return static_cast<T>(m_val); }
constexpr explicit operator bool() const { return m_val; }
#define DEF_OPR(op) \
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR(&)
DEF_OPR(|)
DEF_OPR (^)
constexpr EnumClassBit operator~() const { return ~m_val; }
#undef DEF_OPR
};
#endif // MEGDNN_CC_HOST
} // namespace megdnn
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
return ::megdnn::EnumClassBit<cls>(x) \
op ::megdnn::EnumClassBit<cls>(y); \
} \
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \
::megdnn::EnumClassBit<cls> x, cls y) { \
return x op ::megdnn::EnumClassBit<cls>(y); \
}
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##=(cls& x, cls y) { \
x = x op ::megdnn::EnumClassBit<cls>(y); \
return x; \
}
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
return ~::megdnn::EnumClassBit<cls>(x); \
}
#include "megdnn/internal/visibility_epilogue.h"
// vim: syntax=cpp.doxygen
......@@ -251,6 +251,8 @@ protected:
Handle::HandleType m_handle_type = Handle::HandleType::NAIVE;
};
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(Algorithm::Attribute)
//! policy for executing the operator
struct ExecutionPolicy {
//! INVALID_ALGO_TYPE algo_type means using heuristic
......
......@@ -53,9 +53,13 @@ class FlatBuffersWriter(IndentWriterBase):
e = self._enums[(p, e)]
self._write_doc(e.name)
self._write("enum %s%s : uint {", p, e.name, indent=1)
for member in e.members:
for idx, member in enumerate(e.members):
self._write_doc(member)
self._write("%s,", scramble_enum_member_name(str(member)))
if e.combined:
self._write("%s=%d,", scramble_enum_member_name(str(member)),
1<<idx)
else:
self._write("%s,", scramble_enum_member_name(str(member)))
self._write("}\n", indent=-1)
def _write_doc(self, doc):
......
......@@ -80,13 +80,13 @@ class member_defs:
:attr member_alias: list of (member, alias) pairs
"""
__slots__ = ['name', 'name_field', 'members', 'default',
'member_alias']
'member_alias', 'combined']
all_enums = {}
"""(param_name, name) => enum"""
def __init__(self, param_name, name, name_field, members, default,
member_alias):
member_alias, combined = False):
name = member_defs.Doc.make(name)
assert name.id[0].isupper()
members = tuple(map(member_defs.Doc.make, members))
......@@ -97,6 +97,7 @@ class member_defs:
default = name_field.index(default)
assert isinstance(default, int)
self.name = name
self.combined = combined
self.name_field = self.get_name_field(name.id, name_field)
self.members = members
self.default = default
......@@ -197,6 +198,12 @@ class ParamDef:
self.name.id, name, name_field, members, default, member_alias))
return self
def add_bit_combination_enum(self, name, *members, default=0,
name_field=None, member_alias=[]):
self.members.append(member_defs.Enum(
self.name.id, name, name_field, members, default, member_alias, True))
return self
def add_enum_alias(self, name, src_class, src_name=None, name_field=None,
default=None):
self.members.append(member_defs.EnumAlias(
......@@ -463,8 +470,12 @@ class SerializedDType(_ParamDefBase):
for idx, emem in enumerate(e.members):
self._write('%s = "%s"', emem, emem)
self._write_doc(emem)
self._enum_member2num.append('id({}.{}):{}'.format(
qualname, emem, idx))
if e.combined:
self._enum_member2num.append('id({}.{}):{}'.format(
qualname, emem, 1<<idx))
else:
self._enum_member2num.append('id({}.{}):{}'.format(
qualname, emem, idx))
for emem, emem_alis in e.member_alias:
self._write('%s = %s', emem_alis, emem)
......@@ -622,6 +633,8 @@ class CPPWriter(IndentWriterBase):
for idx, i in enumerate(e.members):
self._write_doc(i)
v = '{} = {}'.format(i, idx)
if e.combined:
v = '{} = 1 << {}'.format(i, idx)
if i is not e.members[-1] or e.member_alias:
v += ','
self._write(v)
......@@ -672,7 +685,6 @@ class CPPEnumValueWriter(CPPWriter):
self._write('static const uint32_t %s = %s;', alias, mem)
self._write('};', indent=-1)
def _on_member_enum_alias(self, e):
s = e.src_enum
self._write('typedef %s::%s %s;', e.src_class, e.src_name, e.name)
......
......@@ -91,12 +91,17 @@ class ConverterWriter(IndentWriterBase):
def format(v):
return '\"{}\"'.format(str(v))
enum_def += ','.join(format(i) for i in e.members)
enum_def += "]"
if e.combined:
enum_def += "], 1"
else:
enum_def += "], 0"
if ENUM_TO_STRING_SPECIAL_RULES.count((p.name, e.name)):
enum_def += ", 1" # whether generate ToStringTrait
enum_def += ">"
self._write("def {} : {};".format(td_class, enum_def))
self._write("def {} : {};".format(td_class, enum_def))
if self._skip_current_param:
return
......
......@@ -21,8 +21,6 @@
namespace megdnn {
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(AlgoAttribute)
#define MEGDNN_DECL_ALGO_TYPE(_type) \
uint32_t type() const override { \
return static_cast<std::underlying_type<AlgoType>::type>( \
......
......@@ -692,61 +692,6 @@ inline void* get_origin_ptr(const TensorND* tensor, void* ptr) {
tensor->layout.span().low_byte);
}
template <typename T>
class EnumClassBit {
std::underlying_type_t<T> m_val;
constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {}
public:
constexpr EnumClassBit(T v)
: m_val(static_cast<std::underlying_type_t<T>>(v)) {}
constexpr operator T() const { return static_cast<T>(m_val); }
constexpr explicit operator bool() const { return m_val; }
#define DEF_OPR(op) \
constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR(&)
DEF_OPR(|)
DEF_OPR (^)
constexpr EnumClassBit operator~() const { return ~m_val; }
#undef DEF_OPR
};
#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
return ::megdnn::EnumClassBit<cls>(x) \
op ::megdnn::EnumClassBit<cls>(y); \
} \
inline constexpr ::megdnn::EnumClassBit<cls> operator op( \
::megdnn::EnumClassBit<cls> x, cls y) { \
return x op ::megdnn::EnumClassBit<cls>(y); \
}
#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##=(cls& x, cls y) { \
x = x op ::megdnn::EnumClassBit<cls>(y); \
return x; \
}
#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MEGDNN_DECBO_SINGLE_OPR(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR(cls, ^) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
return ~::megdnn::EnumClassBit<cls>(x); \
}
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -218,4 +218,3 @@ public:
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -8,9 +8,12 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
from ..core.ops import builtin
from ..logger import get_logger
from ..utils.deprecation import deprecated
Strategy = builtin.ops.Convolution.Strategy
_execution_strategy = os.getenv("MEGENGINE_EXECUTION_STRATEGY", "HEURISTIC")
if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None:
......@@ -19,7 +22,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None:
)
def get_execution_strategy() -> str:
def get_execution_strategy() -> Strategy:
"""
Returns the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
......@@ -28,12 +31,22 @@ def get_execution_strategy() -> str:
return _execution_strategy
def set_execution_strategy(option: str):
def set_execution_strategy(option):
"""
Sets the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
:param option: Decides how :class:`~.Conv2d` and :func:'~.matmul' algorithms are chosen.
Available values:
:param option: Decides how :class:`~.Conv2d`and :func:'~.matmul' algorithms are chosen.
Available value Strategy
* HEURISTIC uses heuristic to choose the fastest algorithm.
* PROFILE runs possible algorithms on real device to find the best one.
* REPRODUCIBLE uses the algorithms that is reproducible.
* OPTMIZED uses the algorithms that is optimized.
The default strategy is HEURISTIC, this options can be combined to
form a combination option, e.g. PROFILE | REPRODUCIBLE
can combined a option that uses the fastest of profiling result that is also reproducible.
Available values string:
* 'HEURISTIC' uses heuristic to choose the fastest algorithm.
* 'PROFILE' runs possible algorithms on real device to find the best one.
......@@ -45,18 +58,29 @@ def set_execution_strategy(option: str):
It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'.
"""
valid_option = (
"HEURISTIC",
"PROFILE",
"PROFILE_HEURISTIC",
"PROFILE_REPRODUCIBLE",
"HEURISTIC_REPRODUCIBLE",
)
if not option in valid_option:
raise ValueError("Valid option can only be one of {}".format(valid_option))
valid_string_option = {
"REPRODUCIBLE": Strategy.REPRODUCIBLE,
"HEURISTIC": Strategy.HEURISTIC,
"PROFILE": Strategy.PROFILE,
}
global _execution_strategy # pylint: disable=global-statement
_execution_strategy = option
if isinstance(option, Strategy):
_execution_strategy = option
return
assert isinstance(option, str)
strategy_tmp = Strategy(0)
for opt in option.split("_"):
if not opt in valid_string_option:
raise ValueError(
"Valid option can only be one of {}, or combine them with '_'.".format(
valid_string_option.keys()
)
)
strategy_tmp = strategy_tmp | valid_string_option[opt]
_execution_strategy = strategy_tmp
@deprecated(version="1.3", reason="use get_execution_strategy() instead")
......
......@@ -19,6 +19,7 @@ import megengine.autodiff as ad
import megengine.functional as F
from megengine import jit
from megengine.core._trace_option import set_symbolic_shape
from megengine.core.ops import builtin
from megengine.core.tensor.utils import make_shape_tuple
from megengine.functional.debug_param import set_execution_strategy
from megengine.jit import SublinearMemoryConfig
......@@ -33,6 +34,8 @@ from megengine.module import (
from megengine.optimizer import SGD
from megengine.tensor import Tensor
Strategy = builtin.ops.Convolution.Strategy
def get_gpu_name():
try:
......@@ -242,7 +245,7 @@ def test_correctness():
else:
model_name = "mnist_model_with_test_cpu.mge"
model_path = os.path.join(os.path.dirname(__file__), model_name)
set_execution_strategy("HEURISTIC_REPRODUCIBLE")
set_execution_strategy(Strategy.HEURISTIC | Strategy.REPRODUCIBLE)
run_train(model_path, False, False, max_err=1e-5)
run_train(model_path, True, False, max_err=1e-5)
......
......@@ -337,6 +337,20 @@ static void gen_op_def_pybind11_single(raw_ostream &os, MgbOp& op, EnumContext&
className, attr->getEnumName(), i
));
}
if (attr->getEnumCombinedFlag()) {
//! define operator |
os << formatv(
"\n .def(\"__or__\", []({0}::{1} s0, {0}::{1} s1) {{ "
"\n return static_cast<{0}::{1}>(uint32_t(s0) | uint32_t(s1));"
"\n })",
className, attr->getEnumName());
//! define operator &
os << formatv(
"\n .def(\"__and__\", []({0}::{1} s0, {0}::{1} s1) {{"
"\n return static_cast<{0}::{1}>(uint32_t(s0) & uint32_t(s1));"
"\n })",
className, attr->getEnumName());
}
os << formatv(
"\n .def(py::init([](const std::string& in) {"
"\n auto&& str = normalize_enum(in);"
......
......@@ -77,6 +77,9 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase {
bool supportToString() const {
return getBaseRecord()->getValueAsBit("supportToString");
}
bool getEnumCombinedFlag() const {
return getBaseRecord()->getValueAsBit("enumCombined");
}
};
struct MgbHashableAttrMixin : public MgbAttrWrapperBase {
......
......@@ -142,8 +142,16 @@ R"__usage__(
#if MGB_ENABLE_FASTRUN
R"__usage__(
--fast-run
Enable fast-run mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes.
This param will be deperated later, please replace with param --full-profile.
--full-profile
Enable full-profile mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes, all algorithms will be profiled
include naive algorithms.
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
--fast-profile
Enable fast-profile mode. Operators with multiple algorithms would be profiled
on the real device with actual input shapes, this mode will only profile the
well optimized algorithms to get the profile result fast.
See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
)__usage__"
#endif
......@@ -511,7 +519,8 @@ struct Args {
bool disable_assert_throw = false;
bool share_param_mem = false;
#if MGB_ENABLE_FASTRUN
bool use_fast_run = false;
bool use_full_profile = false;
bool use_fast_profile = false;
#endif
bool reproducible = false;
std::string fast_run_cache_path;
......@@ -695,18 +704,20 @@ void run_test_st(Args &env) {
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
S strategy = S::HEURISTIC;
#if MGB_ENABLE_FASTRUN
if (env.use_fast_run) {
if (env.use_full_profile) {
if (env.reproducible) {
strategy = S::PROFILE_REPRODUCIBLE;
strategy = S::PROFILE | S::REPRODUCIBLE;
} else {
strategy = S::PROFILE;
}
} else if (env.use_fast_profile) {
strategy = S::PROFILE | S::OPTMIZED;
} else if (env.reproducible) {
strategy = S::HEURISTIC_REPRODUCIBLE;
strategy = S::HEURISTIC | S::REPRODUCIBLE;
}
#else
if (env.reproducible) {
strategy = S::HEURISTIC_REPRODUCIBLE;
strategy = S::HEURISTIC | S::REPRODUCIBLE;
}
#endif
mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
......@@ -729,11 +740,12 @@ void run_test_st(Args &env) {
std::make_shared<InFilePersistentCache>(buf.get(), flen));
#if MGB_ENABLE_FASTRUN
} else {
mgb_assert(env.use_fast_run, "fast-run should be enabled");
mgb_assert(env.use_full_profile || env.use_fast_profile,
"fast-run or fast-profile should be enabled");
PersistentCache::set_impl(
std::make_shared<InFilePersistentCache>());
}
if (!env.use_fast_run)
if (!env.use_full_profile && !env.use_fast_profile)
#endif
mgb::gopt::enable_opr_use_profiling_cache_inplace(vars);
}
......@@ -1314,7 +1326,18 @@ Args Args::from_argv(int argc, char **argv) {
}
#if MGB_ENABLE_FASTRUN
if (!strcmp(argv[i], "--fast-run")) {
ret.use_fast_run = true;
mgb_log_warn(
"--fast-run param will be deperated later, please replace "
"with --full-profile or --fast-profile.");
ret.use_full_profile = true;
continue;
}
if (!strcmp(argv[i], "--full-profile")) {
ret.use_full_profile = true;
continue;
}
if (!strcmp(argv[i], "--fast-profile")) {
ret.use_fast_profile = true;
continue;
}
#endif
......
......@@ -188,7 +188,7 @@ AlgoChooserProfileCache::get(const Key &key) {
auto entry_len = read_uint32();
mgb_assert(buf + entry_len <= buf_end);
auto nr = sscanf(reinterpret_cast<const char*>(buf), ENTRY_FMT,
&i.reproducible, &i.time, &i.workspace);
&i.attribute, &i.time, &i.workspace);
mgb_assert(nr == 3);
buf += entry_len;
}
......@@ -210,10 +210,10 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
auto &&cur = result[i];
if (prev.workspace <= cur.workspace &&
prev.reproducible == cur.reproducible) {
prev.attribute == cur.attribute) {
result.erase(result.begin() + i);
} else {
++ i;
++i;
}
}
......@@ -235,8 +235,8 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
write_uint32(0);
pos = val.size();
val.resize(pos + SPR_SIZE);
uint32_t nr = snprintf(&val[pos], SPR_SIZE,
ENTRY_FMT, i.reproducible, i.time, i.workspace);
uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute,
i.time, i.workspace);
//! for memory boundary failed, snprintf ret do not contain \0
nr += 1;
mgb_assert(nr < SPR_SIZE);
......
......@@ -12,6 +12,8 @@
#pragma once
#include "megbrain_build_config.h"
#include "megbrain/opr/param_defs.h"
#include "megdnn/basic_types.h"
#include <memory>
#include <string>
......@@ -242,6 +244,16 @@ inline constexpr std::size_t operator"" _z(unsigned long long n) {
return n;
}
#endif
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls)
} // namespace mgb
namespace megdnn {
namespace param {
MGB_DEF_ENUM_CLASS_BIT_OPR(ExecutionPolicy::Strategy)
}
} // namespace megdnn
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -12,7 +12,6 @@
#pragma once
#include "megbrain/utils/hash.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/metahelper.h"
#include "megbrain/utils/thin/hash_table.h"
#include "megbrain/utils/thread.h"
......
......@@ -16,7 +16,6 @@
#include "megbrain/graph/symbol_var.h"
#include "megbrain/utils/hashable.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/thin/hash_table.h"
#include "megbrain/utils/small_vector.h"
......
......@@ -12,7 +12,6 @@
#pragma once
#include "megbrain/graph/bases.h"
#include "megbrain/utils/enum_class_bit.h"
#include "megbrain/utils/comp_node_sync_manager.h"
#include "megbrain/utils/small_vector.h"
#include "megbrain/utils/mempool.h"
......
......@@ -33,10 +33,11 @@ class MgbHashableAttrMixin {
string reprFunction = "std::to_string($0)";
}
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit toString> {
class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit combined, bit toString> {
string parentNamespace = namespace;
string enumName = name;
list<string> enumMembers = members;
bit enumCombined = combined;
bit supportToString = toString;
}
......@@ -166,8 +167,8 @@ class MgbTupleAttr<list<MgbAttrWrapper> args>:
}
// -- enum types
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit toString=0>:
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, toString> {
class MgbEnumAttr<string namespace, string enumName, list<string> members, bit combined, bit toString=0>:
HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, combined, toString> {
let storageType = "::mlir::IntegerAttr";
let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
......@@ -176,7 +177,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members, bit t
}
class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>:
MgbEnumAttr<namespace, enumName, base.enumMembers>, MgbAliasAttrMixin<base>;
MgbEnumAttr<namespace, enumName, base.enumMembers, 0>, MgbAliasAttrMixin<base>;
// -- other types
def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> {
......
/**
* \file src/core/include/megbrain/utils/enum_class_bit.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <type_traits>
namespace mgb {
template<typename T>
class EnumClassBit {
std::underlying_type_t<T> m_val;
constexpr EnumClassBit(std::underlying_type_t<T> v):
m_val(v)
{
}
public:
constexpr EnumClassBit(T v):
m_val(static_cast<std::underlying_type_t<T>>(v))
{
}
constexpr operator T() const {
return static_cast<T>(m_val);
}
constexpr explicit operator bool() const {
return m_val;
}
#define DEF_OPR(op) \
constexpr EnumClassBit operator op (\
const EnumClassBit &rhs) const { \
return m_val op rhs.m_val; \
}
DEF_OPR(&)
DEF_OPR(|)
DEF_OPR(^)
constexpr EnumClassBit operator ~() const {
return ~m_val;
}
#undef DEF_OPR
};
}
#define _MGB_DECBO_SINGLE_OPR(cls, op) \
inline constexpr ::mgb::EnumClassBit<cls> operator op (cls x, cls y) { \
return ::mgb::EnumClassBit<cls>(x) op ::mgb::EnumClassBit<cls>(y); \
} \
inline constexpr ::mgb::EnumClassBit<cls> operator op ( \
::mgb::EnumClassBit<cls> x, cls y) { \
return x op ::mgb::EnumClassBit<cls>(y); \
}
#define _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
inline constexpr cls& operator op##= (cls& x, cls y) { \
x = x op ::mgb::EnumClassBit<cls>(y); \
return x; \
}
#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
_MGB_DECBO_SINGLE_OPR(cls, &) \
_MGB_DECBO_SINGLE_OPR(cls, |) \
_MGB_DECBO_SINGLE_OPR(cls, ^) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
_MGB_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
inline constexpr ::mgb::EnumClassBit<cls> operator ~ (cls x) { \
return ~::mgb::EnumClassBit<cls>(x); \
} \
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -100,8 +100,7 @@ namespace mgb {
struct ResultEntry {
std::string algo; //! identifier of the algorithm
//! sscanf will up bool as int
int reproducible; //! whether algorithm is reproducible
uint32_t attribute; //! algo attribute, e.g. reproducible
double time; //! execution time in seconds
size_t workspace; //! workspace in bytes
};
......
......@@ -54,7 +54,6 @@ using namespace gopt;
namespace {
template <typename SharedDeviceTensor, typename MultipleDeviceTensorHolder>
void param_merge(OptState& opt_state) {
auto rewriter = opt_state.graph().make_rewriter();
......@@ -102,7 +101,7 @@ void param_merge(OptState& opt_state) {
rewriter.apply_inplace();
}
}
} // namespace
/* ================ global functions ================ */
......@@ -190,12 +189,10 @@ void gopt::enable_opr_algo_profiling_inplace(
void gopt::enable_opr_use_profiling_cache_inplace(
const VarNodeArrayView& dest_vars) {
modify_opr_algo_strategy_inplace(
dest_vars, opr::mixin::AlgoChooserHelper::ExecutionPolicy::
Strategy::PROFILE_HEURISTIC);
using S = megdnn::param::ExecutionPolicy::Strategy;
modify_opr_algo_strategy_inplace(dest_vars, S::PROFILE | S::HEURISTIC);
}
void gopt::set_opr_algo_workspace_limit_inplace(
const VarNodeArrayView& dest_vars, size_t workspace_limit) {
static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)>
......
......@@ -1693,7 +1693,22 @@ TEST(TestGoptInference, ProfileCache) {
using S = opr::Convolution::ExecutionPolicy::Strategy;
ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy);
gopt::enable_opr_use_profiling_cache_inplace({z + 2.3f});
ASSERT_EQ(S::PROFILE_HEURISTIC, conv.execution_policy().strategy);
ASSERT_EQ(S::PROFILE | S::HEURISTIC, conv.execution_policy().strategy);
}
TEST(TestGoptInference, FastProfileCache) {
HostTensorGenerator<> gen;
auto graph = ComputingGraph::make();
auto host_x = gen({4, 3, 8, 9}), host_y = gen({2, 3, 3, 3});
auto x = opr::Host2DeviceCopy::make(*graph, host_x),
y = opr::Host2DeviceCopy::make(*graph, host_y),
z = opr::Convolution::make(x, y);
auto&& conv = z.node()->owner_opr()->cast_final_safe<opr::Convolution>();
using S = opr::Convolution::ExecutionPolicy::Strategy;
ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy);
gopt::modify_opr_algo_strategy_inplace({z + 2.3f},
S::PROFILE | S::OPTMIZED);
ASSERT_EQ(S::PROFILE | S::OPTMIZED, conv.execution_policy().strategy);
}
TEST(TestGoptInference, AlgoWorkspaceLimit) {
......
......@@ -20,7 +20,6 @@
#include "megbrain/opr/dnn/lrn.h"
#include "megbrain/opr/dnn/fake_quant.h"
#include "megbrain/opr/dnn/tqt.h"
#include "megbrain/serialization/sereg.h"
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs/nn.h"
......
......@@ -284,8 +284,9 @@ namespace mgb {
namespace opr {
template <typename Opr>
void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
if (ctx.get_profile_result_from_cache(require_reproducible).valid())
void AlgoChooser<Opr>::profile(ExeContext& ctx,
ExecutionStrategy select_strategy) {
if (ctx.get_profile_result_from_cache(select_strategy).valid())
return;
AlgoChooserProfileCache::Result prof_rst;
......@@ -305,7 +306,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
algo.name.c_str(), str_on_inp_shape.c_str());
ImplExecutionPolicy policy;
policy.algo = algo.desc;
ctx.construct_execution_policy(require_reproducible, policy);
ctx.construct_execution_policy(select_strategy, policy);
if (ctx.get_workspace_size_bytes(policy) >= workspace_limit)
continue;
......@@ -354,7 +355,8 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy
AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx,
ExecutionStrategy select_strategy,
bool enable_update) {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("AlgoChooser::choose_by_profile")))
if (ctx.owner_graph()->options().no_profiling_on_shape_change) {
......@@ -376,11 +378,11 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, ctx.mgb_opr(), ctx.comp_node(),
ctx.execution_policy(), ctx.allow_weight_preprocess());
AlgoChooser<_Opr>::profile(sub_ctx, require_reproducible);
AlgoChooser<_Opr>::profile(sub_ctx, select_strategy);
});
}
typename AlgoChooser<Opr>::ImplExecutionPolicy policy;
ctx.construct_execution_policy(require_reproducible, policy);
ctx.construct_execution_policy(select_strategy, policy);
return policy;
MIDOUT_E
}
......@@ -402,11 +404,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
ImplExecutionPolicy policy;
if (auto algo_choose_hook = mgb_opr->algo_chooser()) {
policy = algo_choose_hook(mgb_opr);
ctx.construct_execution_policy(
mgb_opr->execution_policy().strategy ==
mixin::AlgoChooserHelper::ExecutionPolicy::Strategy::
HEURISTIC_REPRODUCIBLE,
policy, false);
ctx.construct_execution_policy((ExecutionStrategy::HEURISTIC |
ExecutionStrategy::REPRODUCIBLE),
policy, false);
}
if (!policy.algo.valid()) {
policy = get_policy(ctx);
......@@ -419,10 +419,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo);
mgb_assert(palgo, "Unknown algo description");
ret.append("): algo=" + std::string(palgo->name()));
ret.append(ssprintf(" workspace=%.2fMiB reproducible=%d",
ret.append(ssprintf(" workspace=%.2fMiB attirbute=%d",
workspace / (1024 * 1024.0),
palgo->contain_attribute(
megdnn::AlgoAttribute::REPRODUCIBLE)));
static_cast<uint32_t>(palgo->attribute())));
mgb_log_debug("%s", ret.c_str());
megdnn_opr->execution_policy() = policy;
......@@ -432,41 +431,39 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy(
ExeContext& ctx) {
using S = mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
MGB_MARK_USED_VAR(TIMEOUT_TOLERANCE);
switch (ctx.execution_policy().strategy) {
case S::HEURISTIC:
return ctx.choose_by_heuristic();
case S::HEURISTIC_REPRODUCIBLE:
return ctx.choose_by_heuristic(true);
case S::PROFILE_HEURISTIC: {
ImplExecutionPolicy policy = choose_by_profile(ctx, false, false);
if (!policy.algo.valid())
policy = ctx.choose_by_heuristic();
return policy;
}
auto opr_strategy = ctx.execution_policy().strategy;
if ((opr_strategy & ExecutionStrategy::HEURISTIC) &&
(opr_strategy & ExecutionStrategy::PROFILE)) {
ImplExecutionPolicy policy =
choose_by_profile(ctx, opr_strategy, false);
if (!policy.algo.valid())
policy = ctx.choose_by_heuristic(opr_strategy);
return policy;
} else if ((opr_strategy & ExecutionStrategy::HEURISTIC)) {
return ctx.choose_by_heuristic(opr_strategy);
}
#if MGB_ENABLE_FASTRUN
case S::PROFILE:
return choose_by_profile(ctx, false);
case S::PROFILE_REPRODUCIBLE:
return choose_by_profile(ctx, true);
else if (opr_strategy & ExecutionStrategy::PROFILE) {
return choose_by_profile(ctx, opr_strategy);
}
#endif
default:
mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy");
else {
mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy");
}
}
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \
template void AlgoChooser<megdnn::Opr>::profile( \
ExeContext& ctx, bool require_reproducible); \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::choose_by_profile( \
ExeContext& ctx, bool require_reproducible, bool enable_update); \
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const MGBOpr* mgb_opr, bool allow_weight_preprocess); \
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \
template void AlgoChooser<megdnn::Opr>::profile(ExeContext& ctx, \
ExecutionStrategy); \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::choose_by_profile( \
ExeContext& ctx, ExecutionStrategy, bool enable_update); \
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const MGBOpr* mgb_opr, bool allow_weight_preprocess);
MGB_FOREACH_FASTRUN_OPR(INST)
......@@ -498,7 +495,7 @@ AlgoChooser<Opr>::ExeContext::ExeContext(
template <typename Opr>
typename AlgoChooser<Opr>::ImplAlgo
AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
bool require_reproducible) const {
ExecutionStrategy select_strategy) const {
MIDOUT_B(Opr,
midout_iv(MGB_HASH_STR(
"AlgoChooser::ExeContext::get_profile_result_from_cache")))
......@@ -522,7 +519,9 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
if (prof.empty())
return {};
for (auto&& i : prof) {
if ((!require_reproducible || i.reproducible)) {
if (!(select_strategy & ExecutionStrategy::REPRODUCIBLE) ||
static_cast<AlgoAttribute>(i.attribute) &
AlgoAttribute::REPRODUCIBLE) {
auto iter = algo_map.find(i.algo);
mgb_assert(iter != algo_map.end(),
"algorithm %s exists in "
......@@ -550,7 +549,8 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy
AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
AlgoChooser<Opr>::ExeContext::choose_by_heuristic(
ExecutionStrategy select_strategy) const {
if (m_execution_policy.workspace_limit !=
std::numeric_limits<decltype(
m_execution_policy.workspace_limit)>::max()) {
......@@ -558,6 +558,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
"workspace_limit should not be setted if choose algo by "
"heuristic");
}
bool reproducible = static_cast<bool>(select_strategy &
ExecutionStrategy::REPRODUCIBLE);
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
owner_graph(), m_cn, m_execution_policy.workspace_limit);
ImplExecutionPolicy policy;
......@@ -579,7 +581,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
policy.sub_policy.push_back(sub_ctx.choose_by_heuristic(reproducible));
policy.sub_policy.push_back(
sub_ctx.choose_by_heuristic(select_strategy));
});
return policy;
......@@ -588,9 +591,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
template <typename Opr>
std::vector<typename AlgoChooser<Opr>::ImplAlgo>
AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
auto heu = choose_by_heuristic();
auto&& ret =
APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
auto heu = choose_by_heuristic(ExecutionStrategy::HEURISTIC);
auto&& ret = APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
bool found = false;
for (size_t i = 0; i < ret.size(); ++i) {
if (ret[i].desc == heu.algo) {
......@@ -611,19 +613,21 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
template <typename Opr>
void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
bool require_reproducible,
ExecutionStrategy select_strategy,
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy,
bool retrive_from_cache) const {
bool reproducible = static_cast<bool>(select_strategy &
ExecutionStrategy::REPRODUCIBLE);
if (!policy.algo.valid()) {
if (retrive_from_cache) {
policy.algo =
get_profile_result_from_cache(require_reproducible).desc;
get_profile_result_from_cache(select_strategy).desc;
} else {
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
owner_graph(), m_cn, m_execution_policy.workspace_limit);
policy.algo = APPLY(m_megdnn_opr->get_algorithm_info_heuristic(
args..., workspace_limit,
require_reproducible),
reproducible),
m_layouts)
.desc;
}
......@@ -647,7 +651,7 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
policy.sub_policy.push_back({});
sub_ctx.construct_execution_policy(require_reproducible,
sub_ctx.construct_execution_policy(select_strategy,
policy.sub_policy.back(),
retrive_from_cache);
});
......@@ -718,8 +722,7 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(
return None;
return AlgoChooserProfileCache::ResultEntry{
palgo->name(),
palgo->contain_attribute(
megdnn::AlgoAttribute::REPRODUCIBLE),
static_cast<uint32_t>(palgo->attribute()),
rst.val().time, param.workspace};
}
......@@ -768,10 +771,10 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
bool allow_weight_preprocess); \
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::ExeContext::choose_by_heuristic( \
bool reproducible) const; \
ExecutionStrategy select_strategy) const; \
template typename AlgoChooser<megdnn::Opr>::ImplAlgo \
AlgoChooser<megdnn::Opr>::ExeContext::get_profile_result_from_cache( \
bool require_reproducible) const; \
ExecutionStrategy select_strategy) const; \
template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \
AlgoChooser<megdnn::Opr>::ExeContext::get_all_candidates() const; \
template size_t \
......@@ -780,7 +783,7 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
policy) const; \
template void \
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy( \
bool require_reproducible, \
ExecutionStrategy select_strategy, \
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
bool retrive_from_cache) const; \
template Maybe<AlgoChooserProfileCache::ResultEntry> \
......
......@@ -35,6 +35,13 @@ MGB_FOREACH_FASTRUN_OPR(cb)
#undef cb
namespace mgb {
//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum
//! and megdnn::detail::AlgoAttribute enum
using ExecutionStrategy = megdnn::param::ExecutionPolicy::Strategy;
using AlgoAttribute = megdnn::AlgoAttribute;
namespace opr {
/* =================== AlgoChooser =================== */
......@@ -103,7 +110,7 @@ public:
const FixedTensorLayouts& layouts() const { return m_layouts; }
ImplExecutionPolicy choose_by_heuristic(
bool reproducible = false) const;
ExecutionStrategy select_strategy) const;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
......@@ -126,19 +133,20 @@ public:
const ImplExecutionPolicy& policy, double& timeout) const;
//! get all profile algorithm from cache, return invalid if not exists
ImplAlgo get_profile_result_from_cache(bool require_reproducible) const;
ImplAlgo get_profile_result_from_cache(
ExecutionStrategy select_strategy) const;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param require_reproducible select algo which is reproducible
* \param select_strategy select algo which matched this strategy
* \param policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
*/
void construct_execution_policy(
bool require_reproducible, ImplExecutionPolicy& policy,
bool retrive_from_cache = true) const;
void construct_execution_policy(ExecutionStrategy select_strategy,
ImplExecutionPolicy& policy,
bool retrive_from_cache = true) const;
private:
Maybe<PreprocessFilter<Opr>> construct_fake_preprocess_filter() const;
......@@ -153,11 +161,11 @@ private:
//! profile and save to cache
static void profile(ExeContext& ctx, bool require_reproducible);
static void profile(ExeContext& ctx, ExecutionStrategy select_strategy);
static ImplExecutionPolicy choose_by_profile(ExeContext& ctx,
bool require_reproducible,
bool enable_update = true);
static ImplExecutionPolicy choose_by_profile(
ExeContext& ctx, ExecutionStrategy select_strategy,
bool enable_update = true);
public:
/*!
......
......@@ -13,7 +13,6 @@
#pragma once
#include "megbrain/graph/operator_node.h"
#include "megbrain/opr/param_defs.h"
#include "megdnn/oprs/base.h"
#include "megdnn/oprs/nn.h"
......@@ -73,7 +72,6 @@ protected:
};
} // namespace mixin
} // namespace opr
} // namespace mgb
......
......@@ -429,10 +429,11 @@ TEST(TestOprDNN, MatrixMulExePolicy) {
auto cn = CompNode::load("cpux");
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy: {S:HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
auto graph = ComputingGraph::make();
......
......@@ -355,11 +355,13 @@ TEST(TestOprDNN, ConvBiasExePolicy) {
auto cn = CompNode::load("cpux");
#if MGB_ENABLE_FASTRUN
for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
auto graph = ComputingGraph::make();
HostTensorGenerator<> gen;
......@@ -397,7 +399,8 @@ TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) {
auto cn = CompNode::load("cpux");
for (auto strategy: {S::PROFILE, S::PROFILE_REPRODUCIBLE}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::PROFILE | S::REPRODUCIBLE}) {
auto graph = ComputingGraph::make();
HostTensorGenerator<> gen;
......@@ -439,10 +442,12 @@ TEST(TestOprDNN, ConvolutionExePolicy) {
PersistentCacheHook cache_hook{on_get};
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
using Checker = AutoOprChecker<2, 1>;
......@@ -522,10 +527,11 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) {
PersistentCacheHook cache_hook{on_get};
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
{S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE),
S(S::PROFILE | S::HEURISTIC)}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) {
#endif
using Checker = AutoOprChecker<2, 1>;
......@@ -1183,9 +1189,12 @@ TEST(TestOprDNN, Convolution3DExePolicy) {
using S = Policy::Strategy;
#if MGB_ENABLE_FASTRUN
for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
using Checker = AutoOprChecker<2, 1>;
......@@ -1660,10 +1669,12 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) {
PersistentCacheHook cache_hook{on_get};
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
#else
for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
auto make_graph = [&](const Checker::SymInpArray& inputs)
-> Checker::SymOutArray {
......@@ -1769,10 +1780,12 @@ TEST(TestOprDNN, DeformableConvForward) {
Param param;
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
#else
for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
auto make_graph = [&](const Checker::SymInpArray& inputs)
-> Checker::SymOutArray {
......@@ -1936,10 +1949,12 @@ TEST(TestOprDNN, BatchConvBiasForward) {
param.sparse = Param::Sparse::DENSE;
#if MGB_ENABLE_FASTRUN
for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
#else
for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) {
for (auto strategy :
SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
#endif
auto make_quantized = [&](SymbolVar x, const DType& dtype) {
......@@ -2080,7 +2095,8 @@ TEST(TestOprDNN, HeuristicReproducible) {
constexpr size_t PH = 1, PW = 1, SH = 1, SW = 1;
for (auto strategy : {S::HEURISTIC, S::HEURISTIC_REPRODUCIBLE}) {
for (auto strategy :
SmallVector<S>{S::HEURISTIC, S::HEURISTIC | S::REPRODUCIBLE}) {
VarNode* bwd_flt;
auto make_graph = [&](const Checker::SymInpArray& inputs)
-> Checker::SymOutArray {
......@@ -2126,7 +2142,7 @@ TEST(TestOprDNN, HeuristicReproducible) {
megdnn::Algorithm* palgo =
megdnn_opr->get_algorithm_from_desc(algo);
mgb_assert(palgo, "Unknown algo description");
if (strategy == S::HEURISTIC_REPRODUCIBLE) {
if (strategy == S(S::HEURISTIC | S::REPRODUCIBLE)) {
EXPECT_TRUE(palgo->contain_attribute(
megdnn::AlgoAttribute::REPRODUCIBLE));
}
......
......@@ -43,6 +43,7 @@ namespace megdnn {
std::ostream &ostr, const DType &dt) {
return ostr << dt.name();
}
} // namespace megdnn
namespace mgb {
......
......@@ -18,7 +18,7 @@ pdef('PersistentOutputStorage').add_fields(
add_const('int32', 'INVALID_AXIS', 'MAX_NDIM').
add_fields('int32', 'axis', 'INVALID_AXIS'))
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator').
(pdef('ExecutionPolicy', version=0, is_legacy=True).
add_enum('Strategy',
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'),
Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, '
......@@ -33,6 +33,20 @@ pdef('PersistentOutputStorage').add_fields(
Doc('workspace_limit', 'workspace limit in bytes'),
str(2**64-1)+'ull'))
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator', version=1).
add_bit_combination_enum('Strategy',
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'),
Doc('PROFILE',
'run possible algorithms on real device to find the best'),
Doc('REPRODUCIBLE',
'when profile or heuristic algo selection it require the algos'
'must be reproducible'),
Doc('OPTMIZED',
'profile require algos are optmized to achieve fast-profile')).
add_fields('uint64',
Doc('workspace_limit', 'workspace limit in bytes'),
str(2**64-1)+'ull'))
(pdef('AssertEqual').
add_fields('float32',
Doc('maxerr', 'max allowed error; error is defined as the minimal '
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册