diff --git a/dnn/include/megdnn/basic_types.h b/dnn/include/megdnn/basic_types.h index 61f580a5f2f7173d041b6872b0d6990643d53a13..c2902613f6443a684ff15a3ef0657b9528f3038e 100644 --- a/dnn/include/megdnn/basic_types.h +++ b/dnn/include/megdnn/basic_types.h @@ -506,10 +506,66 @@ struct DynOutMallocPolicyCall { } }; + +template +class EnumClassBit { + std::underlying_type_t m_val; + + constexpr EnumClassBit(std::underlying_type_t v) : m_val(v) {} + +public: + constexpr EnumClassBit(T v) + : m_val(static_cast>(v)) {} + + constexpr operator T() const { return static_cast(m_val); } + + constexpr explicit operator bool() const { return m_val; } + +#define DEF_OPR(op) \ + constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \ + return m_val op rhs.m_val; \ + } + + DEF_OPR(&) + DEF_OPR(|) + DEF_OPR (^) + + constexpr EnumClassBit operator~() const { return ~m_val; } + +#undef DEF_OPR +}; + #endif // MEGDNN_CC_HOST } // namespace megdnn +#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \ + inline constexpr ::megdnn::EnumClassBit operator op(cls x, cls y) { \ + return ::megdnn::EnumClassBit(x) \ + op ::megdnn::EnumClassBit(y); \ + } \ + inline constexpr ::megdnn::EnumClassBit operator op( \ + ::megdnn::EnumClassBit x, cls y) { \ + return x op ::megdnn::EnumClassBit(y); \ + } + +#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ + inline constexpr cls& operator op##=(cls& x, cls y) { \ + x = x op ::megdnn::EnumClassBit(y); \ + return x; \ + } + +#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \ + _MEGDNN_DECBO_SINGLE_OPR(cls, &) \ + _MEGDNN_DECBO_SINGLE_OPR(cls, |) \ + _MEGDNN_DECBO_SINGLE_OPR(cls, ^) \ + _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ + _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ + _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ + inline constexpr ::megdnn::EnumClassBit operator~(cls x) { \ + return ~::megdnn::EnumClassBit(x); \ + } + #include "megdnn/internal/visibility_epilogue.h" // vim: syntax=cpp.doxygen diff --git a/dnn/include/megdnn/oprs/base.h b/dnn/include/megdnn/oprs/base.h index 1fedcd5d61715845c695ff34d1fb1f55f8c4c65c..d111a0c2adcb320482b63093b3be0d69b42154df 100644 --- a/dnn/include/megdnn/oprs/base.h +++ b/dnn/include/megdnn/oprs/base.h @@ -251,6 +251,8 @@ protected: Handle::HandleType m_handle_type = Handle::HandleType::NAIVE; }; +MEGDNN_DEF_ENUM_CLASS_BIT_OPR(Algorithm::Attribute) + //! policy for executing the operator struct ExecutionPolicy { //! INVALID_ALGO_TYPE algo_type means using heuristic diff --git a/dnn/scripts/gen_flatbuffers_schema.py b/dnn/scripts/gen_flatbuffers_schema.py index f66040f4400f91cc1d82fdd24de626620dd41fd0..3c43561b4f80a1902b539a17219764c01036fba6 100755 --- a/dnn/scripts/gen_flatbuffers_schema.py +++ b/dnn/scripts/gen_flatbuffers_schema.py @@ -53,9 +53,13 @@ class FlatBuffersWriter(IndentWriterBase): e = self._enums[(p, e)] self._write_doc(e.name) self._write("enum %s%s : uint {", p, e.name, indent=1) - for member in e.members: + for idx, member in enumerate(e.members): self._write_doc(member) - self._write("%s,", scramble_enum_member_name(str(member))) + if e.combined: + self._write("%s=%d,", scramble_enum_member_name(str(member)), + 1< enum""" def __init__(self, param_name, name, name_field, members, default, - member_alias): + member_alias, combined = False): name = member_defs.Doc.make(name) assert name.id[0].isupper() members = tuple(map(member_defs.Doc.make, members)) @@ -97,6 +97,7 @@ class member_defs: default = name_field.index(default) assert isinstance(default, int) self.name = name + self.combined = combined self.name_field = self.get_name_field(name.id, name_field) self.members = members self.default = default @@ -197,6 +198,12 @@ class ParamDef: self.name.id, name, name_field, members, default, member_alias)) return self + def add_bit_combination_enum(self, name, *members, default=0, + name_field=None, member_alias=[]): + self.members.append(member_defs.Enum( + self.name.id, name, name_field, members, default, member_alias, True)) + return self + def add_enum_alias(self, name, src_class, src_name=None, name_field=None, default=None): self.members.append(member_defs.EnumAlias( @@ -463,8 +470,12 @@ class SerializedDType(_ParamDefBase): for idx, emem in enumerate(e.members): self._write('%s = "%s"', emem, emem) self._write_doc(emem) - self._enum_member2num.append('id({}.{}):{}'.format( - qualname, emem, idx)) + if e.combined: + self._enum_member2num.append('id({}.{}):{}'.format( + qualname, emem, 1<::type>( \ diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h index 3af0ccdf87f5a54729555b72030cb4e936d8e5e2..a0309aedbe481d11efc4c7adc899c6e89ee0b03b 100644 --- a/dnn/src/common/utils.h +++ b/dnn/src/common/utils.h @@ -692,61 +692,6 @@ inline void* get_origin_ptr(const TensorND* tensor, void* ptr) { tensor->layout.span().low_byte); } -template -class EnumClassBit { - std::underlying_type_t m_val; - - constexpr EnumClassBit(std::underlying_type_t v) : m_val(v) {} - -public: - constexpr EnumClassBit(T v) - : m_val(static_cast>(v)) {} - - constexpr operator T() const { return static_cast(m_val); } - - constexpr explicit operator bool() const { return m_val; } - -#define DEF_OPR(op) \ - constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \ - return m_val op rhs.m_val; \ - } - - DEF_OPR(&) - DEF_OPR(|) - DEF_OPR (^) - - constexpr EnumClassBit operator~() const { return ~m_val; } - -#undef DEF_OPR -}; - -#define _MEGDNN_DECBO_SINGLE_OPR(cls, op) \ - inline constexpr ::megdnn::EnumClassBit operator op(cls x, cls y) { \ - return ::megdnn::EnumClassBit(x) \ - op ::megdnn::EnumClassBit(y); \ - } \ - inline constexpr ::megdnn::EnumClassBit operator op( \ - ::megdnn::EnumClassBit x, cls y) { \ - return x op ::megdnn::EnumClassBit(y); \ - } - -#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ - inline constexpr cls& operator op##=(cls& x, cls y) { \ - x = x op ::megdnn::EnumClassBit(y); \ - return x; \ - } - -#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) \ - _MEGDNN_DECBO_SINGLE_OPR(cls, &) \ - _MEGDNN_DECBO_SINGLE_OPR(cls, |) \ - _MEGDNN_DECBO_SINGLE_OPR(cls, ^) \ - _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ - _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ - _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ - inline constexpr ::megdnn::EnumClassBit operator~(cls x) { \ - return ~::megdnn::EnumClassBit(x); \ - } - } // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.h b/dnn/src/cuda/convolution3d/backward_filter/algo.h index 7e9f84e96c9f595fccb959f0abff67d246da7401..844eb815047cb4dfddf631ce4a7857861ba1a627 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/algo.h +++ b/dnn/src/cuda/convolution3d/backward_filter/algo.h @@ -218,4 +218,3 @@ public: } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/imperative/python/megengine/functional/debug_param.py b/imperative/python/megengine/functional/debug_param.py index a257938d1a60b5d4d65ab9e9757b511de27b3999..08bace859c4e3377ad7c6e17645f9de018984f65 100644 --- a/imperative/python/megengine/functional/debug_param.py +++ b/imperative/python/megengine/functional/debug_param.py @@ -8,9 +8,12 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import os +from ..core.ops import builtin from ..logger import get_logger from ..utils.deprecation import deprecated +Strategy = builtin.ops.Convolution.Strategy + _execution_strategy = os.getenv("MEGENGINE_EXECUTION_STRATEGY", "HEURISTIC") if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: @@ -19,7 +22,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None: ) -def get_execution_strategy() -> str: +def get_execution_strategy() -> Strategy: """ Returns the execution strategy of :class:`~.Conv2d` and :func:'~.matmul' @@ -28,12 +31,22 @@ def get_execution_strategy() -> str: return _execution_strategy -def set_execution_strategy(option: str): +def set_execution_strategy(option): """ Sets the execution strategy of :class:`~.Conv2d` and :func:'~.matmul' - :param option: Decides how :class:`~.Conv2d` and :func:'~.matmul' algorithms are chosen. - Available values: + :param option: Decides how :class:`~.Conv2d`and :func:'~.matmul' algorithms are chosen. + Available value Strategy + * HEURISTIC uses heuristic to choose the fastest algorithm. + * PROFILE runs possible algorithms on real device to find the best one. + * REPRODUCIBLE uses the algorithms that is reproducible. + * OPTMIZED uses the algorithms that is optimized. + + The default strategy is HEURISTIC, this options can be combined to + form a combination option, e.g. PROFILE | REPRODUCIBLE + can combined a option that uses the fastest of profiling result that is also reproducible. + + Available values string: * 'HEURISTIC' uses heuristic to choose the fastest algorithm. * 'PROFILE' runs possible algorithms on real device to find the best one. @@ -45,18 +58,29 @@ def set_execution_strategy(option: str): It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'. """ - valid_option = ( - "HEURISTIC", - "PROFILE", - "PROFILE_HEURISTIC", - "PROFILE_REPRODUCIBLE", - "HEURISTIC_REPRODUCIBLE", - ) - if not option in valid_option: - raise ValueError("Valid option can only be one of {}".format(valid_option)) + valid_string_option = { + "REPRODUCIBLE": Strategy.REPRODUCIBLE, + "HEURISTIC": Strategy.HEURISTIC, + "PROFILE": Strategy.PROFILE, + } global _execution_strategy # pylint: disable=global-statement - _execution_strategy = option + if isinstance(option, Strategy): + _execution_strategy = option + return + + assert isinstance(option, str) + + strategy_tmp = Strategy(0) + for opt in option.split("_"): + if not opt in valid_string_option: + raise ValueError( + "Valid option can only be one of {}, or combine them with '_'.".format( + valid_string_option.keys() + ) + ) + strategy_tmp = strategy_tmp | valid_string_option[opt] + _execution_strategy = strategy_tmp @deprecated(version="1.3", reason="use get_execution_strategy() instead") diff --git a/imperative/python/test/integration/test_correctness_mnistnet.py b/imperative/python/test/integration/test_correctness_mnistnet.py index a88075a23870a1c9fe478d31eecd8dc1a933d722..ce33b54161b216322d06f656b82f0bd2e8a48e07 100644 --- a/imperative/python/test/integration/test_correctness_mnistnet.py +++ b/imperative/python/test/integration/test_correctness_mnistnet.py @@ -19,6 +19,7 @@ import megengine.autodiff as ad import megengine.functional as F from megengine import jit from megengine.core._trace_option import set_symbolic_shape +from megengine.core.ops import builtin from megengine.core.tensor.utils import make_shape_tuple from megengine.functional.debug_param import set_execution_strategy from megengine.jit import SublinearMemoryConfig @@ -33,6 +34,8 @@ from megengine.module import ( from megengine.optimizer import SGD from megengine.tensor import Tensor +Strategy = builtin.ops.Convolution.Strategy + def get_gpu_name(): try: @@ -242,7 +245,7 @@ def test_correctness(): else: model_name = "mnist_model_with_test_cpu.mge" model_path = os.path.join(os.path.dirname(__file__), model_name) - set_execution_strategy("HEURISTIC_REPRODUCIBLE") + set_execution_strategy(Strategy.HEURISTIC | Strategy.REPRODUCIBLE) run_train(model_path, False, False, max_err=1e-5) run_train(model_path, True, False, max_err=1e-5) diff --git a/imperative/tablegen/autogen.cpp b/imperative/tablegen/autogen.cpp index 5e2b137a1c7050e0487783a49d08b4bcccd4b260..1e00f8f3d7901610d5519065a59f08897c3d12da 100644 --- a/imperative/tablegen/autogen.cpp +++ b/imperative/tablegen/autogen.cpp @@ -337,6 +337,20 @@ static void gen_op_def_pybind11_single(raw_ostream &os, MgbOp& op, EnumContext& className, attr->getEnumName(), i )); } + if (attr->getEnumCombinedFlag()) { + //! define operator | + os << formatv( + "\n .def(\"__or__\", []({0}::{1} s0, {0}::{1} s1) {{ " + "\n return static_cast<{0}::{1}>(uint32_t(s0) | uint32_t(s1));" + "\n })", + className, attr->getEnumName()); + //! define operator & + os << formatv( + "\n .def(\"__and__\", []({0}::{1} s0, {0}::{1} s1) {{" + "\n return static_cast<{0}::{1}>(uint32_t(s0) & uint32_t(s1));" + "\n })", + className, attr->getEnumName()); + } os << formatv( "\n .def(py::init([](const std::string& in) {" "\n auto&& str = normalize_enum(in);" diff --git a/imperative/tablegen/helper.h b/imperative/tablegen/helper.h index c5f084e4aa1426dec60df91d7dbbe815c6a9e342..c0fa56fb07fe77f8cee995349343868eb3a7e0ff 100644 --- a/imperative/tablegen/helper.h +++ b/imperative/tablegen/helper.h @@ -77,6 +77,9 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase { bool supportToString() const { return getBaseRecord()->getValueAsBit("supportToString"); } + bool getEnumCombinedFlag() const { + return getBaseRecord()->getValueAsBit("enumCombined"); + } }; struct MgbHashableAttrMixin : public MgbAttrWrapperBase { diff --git a/sdk/load-and-run/src/mgblar.cpp b/sdk/load-and-run/src/mgblar.cpp index fa131a7e55a7f7609d371b019bb51c1032770634..f25a0e37497e8c6a728210abd84b7e4ed15c0a69 100644 --- a/sdk/load-and-run/src/mgblar.cpp +++ b/sdk/load-and-run/src/mgblar.cpp @@ -142,8 +142,16 @@ R"__usage__( #if MGB_ENABLE_FASTRUN R"__usage__( --fast-run - Enable fast-run mode. Operators with multiple algorithms would be profiled - on the real device with actual input shapes. + This param will be deperated later, please replace with param --full-profile. + --full-profile + Enable full-profile mode. Operators with multiple algorithms would be profiled + on the real device with actual input shapes, all algorithms will be profiled + include naive algorithms. + See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details. + --fast-profile + Enable fast-profile mode. Operators with multiple algorithms would be profiled + on the real device with actual input shapes, this mode will only profile the + well optimized algorithms to get the profile result fast. See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details. )__usage__" #endif @@ -511,7 +519,8 @@ struct Args { bool disable_assert_throw = false; bool share_param_mem = false; #if MGB_ENABLE_FASTRUN - bool use_fast_run = false; + bool use_full_profile = false; + bool use_fast_profile = false; #endif bool reproducible = false; std::string fast_run_cache_path; @@ -695,18 +704,20 @@ void run_test_st(Args &env) { using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy; S strategy = S::HEURISTIC; #if MGB_ENABLE_FASTRUN - if (env.use_fast_run) { + if (env.use_full_profile) { if (env.reproducible) { - strategy = S::PROFILE_REPRODUCIBLE; + strategy = S::PROFILE | S::REPRODUCIBLE; } else { strategy = S::PROFILE; } + } else if (env.use_fast_profile) { + strategy = S::PROFILE | S::OPTMIZED; } else if (env.reproducible) { - strategy = S::HEURISTIC_REPRODUCIBLE; + strategy = S::HEURISTIC | S::REPRODUCIBLE; } #else if (env.reproducible) { - strategy = S::HEURISTIC_REPRODUCIBLE; + strategy = S::HEURISTIC | S::REPRODUCIBLE; } #endif mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy); @@ -729,11 +740,12 @@ void run_test_st(Args &env) { std::make_shared(buf.get(), flen)); #if MGB_ENABLE_FASTRUN } else { - mgb_assert(env.use_fast_run, "fast-run should be enabled"); + mgb_assert(env.use_full_profile || env.use_fast_profile, + "fast-run or fast-profile should be enabled"); PersistentCache::set_impl( std::make_shared()); } - if (!env.use_fast_run) + if (!env.use_full_profile && !env.use_fast_profile) #endif mgb::gopt::enable_opr_use_profiling_cache_inplace(vars); } @@ -1314,7 +1326,18 @@ Args Args::from_argv(int argc, char **argv) { } #if MGB_ENABLE_FASTRUN if (!strcmp(argv[i], "--fast-run")) { - ret.use_fast_run = true; + mgb_log_warn( + "--fast-run param will be deperated later, please replace " + "with --full-profile or --fast-profile."); + ret.use_full_profile = true; + continue; + } + if (!strcmp(argv[i], "--full-profile")) { + ret.use_full_profile = true; + continue; + } + if (!strcmp(argv[i], "--fast-profile")) { + ret.use_fast_profile = true; continue; } #endif diff --git a/src/core/impl/utils/persistent_cache.cpp b/src/core/impl/utils/persistent_cache.cpp index 8dbbd3c180f8f24569edaa522dd00501e051ae50..19cbf4c1f6f2aeb57ee80ee21d967e5ac08ccf7f 100644 --- a/src/core/impl/utils/persistent_cache.cpp +++ b/src/core/impl/utils/persistent_cache.cpp @@ -188,7 +188,7 @@ AlgoChooserProfileCache::get(const Key &key) { auto entry_len = read_uint32(); mgb_assert(buf + entry_len <= buf_end); auto nr = sscanf(reinterpret_cast(buf), ENTRY_FMT, - &i.reproducible, &i.time, &i.workspace); + &i.attribute, &i.time, &i.workspace); mgb_assert(nr == 3); buf += entry_len; } @@ -210,10 +210,10 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) { auto &&cur = result[i]; if (prev.workspace <= cur.workspace && - prev.reproducible == cur.reproducible) { + prev.attribute == cur.attribute) { result.erase(result.begin() + i); } else { - ++ i; + ++i; } } @@ -235,8 +235,8 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) { write_uint32(0); pos = val.size(); val.resize(pos + SPR_SIZE); - uint32_t nr = snprintf(&val[pos], SPR_SIZE, - ENTRY_FMT, i.reproducible, i.time, i.workspace); + uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute, + i.time, i.workspace); //! for memory boundary failed, snprintf ret do not contain \0 nr += 1; mgb_assert(nr < SPR_SIZE); diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h index c90b461ff6e2bf0c7e1ba092df00f1d91e869935..085ff414481db259a15034f02a69af4120810e04 100644 --- a/src/core/include/megbrain/common.h +++ b/src/core/include/megbrain/common.h @@ -12,6 +12,8 @@ #pragma once #include "megbrain_build_config.h" +#include "megbrain/opr/param_defs.h" +#include "megdnn/basic_types.h" #include #include @@ -242,6 +244,16 @@ inline constexpr std::size_t operator"" _z(unsigned long long n) { return n; } #endif + +#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \ + MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls) + } // namespace mgb +namespace megdnn { +namespace param { +MGB_DEF_ENUM_CLASS_BIT_OPR(ExecutionPolicy::Strategy) +} +} // namespace megdnn + // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index 2ff854495db00056a2ce01160f4136f575660595..265f8e67746903dba272df151a57d2da5c338ec4 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -12,7 +12,6 @@ #pragma once #include "megbrain/utils/hash.h" -#include "megbrain/utils/enum_class_bit.h" #include "megbrain/utils/metahelper.h" #include "megbrain/utils/thin/hash_table.h" #include "megbrain/utils/thread.h" diff --git a/src/core/include/megbrain/graph/operator_node.h b/src/core/include/megbrain/graph/operator_node.h index a8ea098051d9762ad774b2dffa51974b1e36c539..27c597416dd6270e28b4577c64fe1635e761d264 100644 --- a/src/core/include/megbrain/graph/operator_node.h +++ b/src/core/include/megbrain/graph/operator_node.h @@ -16,7 +16,6 @@ #include "megbrain/graph/symbol_var.h" #include "megbrain/utils/hashable.h" -#include "megbrain/utils/enum_class_bit.h" #include "megbrain/utils/thin/hash_table.h" #include "megbrain/utils/small_vector.h" diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h index e10319591fe0c8d3cb5463f3636fd208aef84c38..0b61902343173e8b25252cb2950a80394562e0d4 100644 --- a/src/core/include/megbrain/graph/var_node.h +++ b/src/core/include/megbrain/graph/var_node.h @@ -12,7 +12,6 @@ #pragma once #include "megbrain/graph/bases.h" -#include "megbrain/utils/enum_class_bit.h" #include "megbrain/utils/comp_node_sync_manager.h" #include "megbrain/utils/small_vector.h" #include "megbrain/utils/mempool.h" diff --git a/src/core/include/megbrain/ir/base.td b/src/core/include/megbrain/ir/base.td index d1f35ebc61ca85721f803302314b2c65fec8080b..ee6b75fc66e4feeec44986ee2386c684c894d7dc 100644 --- a/src/core/include/megbrain/ir/base.td +++ b/src/core/include/megbrain/ir/base.td @@ -33,10 +33,11 @@ class MgbHashableAttrMixin { string reprFunction = "std::to_string($0)"; } -class MgbEnumAttrMixin members, bit toString> { +class MgbEnumAttrMixin members, bit combined, bit toString> { string parentNamespace = namespace; string enumName = name; list enumMembers = members; + bit enumCombined = combined; bit supportToString = toString; } @@ -166,8 +167,8 @@ class MgbTupleAttr args>: } // -- enum types -class MgbEnumAttr members, bit toString=0>: - HashableAttr, MgbEnumAttrMixin { +class MgbEnumAttr members, bit combined, bit toString=0>: + HashableAttr, MgbEnumAttrMixin { let storageType = "::mlir::IntegerAttr"; let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())"; let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast($0))"; @@ -176,7 +177,7 @@ class MgbEnumAttr members, bit t } class MgbEnumAliasAttr: - MgbEnumAttr, MgbAliasAttrMixin; + MgbEnumAttr, MgbAliasAttrMixin; // -- other types def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> { diff --git a/src/core/include/megbrain/utils/enum_class_bit.h b/src/core/include/megbrain/utils/enum_class_bit.h deleted file mode 100644 index 35e50ccfa2edfd2894d15859bed56037a136a2e7..0000000000000000000000000000000000000000 --- a/src/core/include/megbrain/utils/enum_class_bit.h +++ /dev/null @@ -1,89 +0,0 @@ -/** - * \file src/core/include/megbrain/utils/enum_class_bit.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - -#pragma once - -#include - -namespace mgb { - template - class EnumClassBit { - std::underlying_type_t m_val; - - constexpr EnumClassBit(std::underlying_type_t v): - m_val(v) - { - } - - public: - constexpr EnumClassBit(T v): - m_val(static_cast>(v)) - { - } - - constexpr operator T() const { - return static_cast(m_val); - } - - constexpr explicit operator bool() const { - return m_val; - } - -#define DEF_OPR(op) \ - constexpr EnumClassBit operator op (\ - const EnumClassBit &rhs) const { \ - return m_val op rhs.m_val; \ - } - - DEF_OPR(&) - DEF_OPR(|) - DEF_OPR(^) - - constexpr EnumClassBit operator ~() const { - return ~m_val; - } - - -#undef DEF_OPR - }; - -} - -#define _MGB_DECBO_SINGLE_OPR(cls, op) \ - inline constexpr ::mgb::EnumClassBit operator op (cls x, cls y) { \ - return ::mgb::EnumClassBit(x) op ::mgb::EnumClassBit(y); \ - } \ - inline constexpr ::mgb::EnumClassBit operator op ( \ - ::mgb::EnumClassBit x, cls y) { \ - return x op ::mgb::EnumClassBit(y); \ - } - -#define _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, op) \ - inline constexpr cls& operator op##= (cls& x, cls y) { \ - x = x op ::mgb::EnumClassBit(y); \ - return x; \ - } - -#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \ - _MGB_DECBO_SINGLE_OPR(cls, &) \ - _MGB_DECBO_SINGLE_OPR(cls, |) \ - _MGB_DECBO_SINGLE_OPR(cls, ^) \ - _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, &) \ - _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, |) \ - _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \ - inline constexpr ::mgb::EnumClassBit operator ~ (cls x) { \ - return ~::mgb::EnumClassBit(x); \ - } \ - - - -// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} - diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h index 8c2777a9bf273936c3c02446b49aefb26507221d..ef84fa7022f135041873cccf653b7a40319ab6bc 100644 --- a/src/core/include/megbrain/utils/persistent_cache.h +++ b/src/core/include/megbrain/utils/persistent_cache.h @@ -100,8 +100,7 @@ namespace mgb { struct ResultEntry { std::string algo; //! identifier of the algorithm - //! sscanf will up bool as int - int reproducible; //! whether algorithm is reproducible + uint32_t attribute; //! algo attribute, e.g. reproducible double time; //! execution time in seconds size_t workspace; //! workspace in bytes }; diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp index 2e4ddcf8cfad03c18fc677416cc72b4732b06d49..3b4a86087f5b525a74afbb28330f231fb97784b6 100644 --- a/src/gopt/impl/inference.cpp +++ b/src/gopt/impl/inference.cpp @@ -54,7 +54,6 @@ using namespace gopt; namespace { - template void param_merge(OptState& opt_state) { auto rewriter = opt_state.graph().make_rewriter(); @@ -102,7 +101,7 @@ void param_merge(OptState& opt_state) { rewriter.apply_inplace(); } -} +} // namespace /* ================ global functions ================ */ @@ -190,12 +189,10 @@ void gopt::enable_opr_algo_profiling_inplace( void gopt::enable_opr_use_profiling_cache_inplace( const VarNodeArrayView& dest_vars) { - modify_opr_algo_strategy_inplace( - dest_vars, opr::mixin::AlgoChooserHelper::ExecutionPolicy:: - Strategy::PROFILE_HEURISTIC); + using S = megdnn::param::ExecutionPolicy::Strategy; + modify_opr_algo_strategy_inplace(dest_vars, S::PROFILE | S::HEURISTIC); } - void gopt::set_opr_algo_workspace_limit_inplace( const VarNodeArrayView& dest_vars, size_t workspace_limit) { static const ThinHashMap diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index 4ef7808e72fdefb5498d9937ed8dbc1e5446b9c9..36d498307009bb4682a5d92ccf726ca35c2f3c99 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -1693,7 +1693,22 @@ TEST(TestGoptInference, ProfileCache) { using S = opr::Convolution::ExecutionPolicy::Strategy; ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); gopt::enable_opr_use_profiling_cache_inplace({z + 2.3f}); - ASSERT_EQ(S::PROFILE_HEURISTIC, conv.execution_policy().strategy); + ASSERT_EQ(S::PROFILE | S::HEURISTIC, conv.execution_policy().strategy); +} + +TEST(TestGoptInference, FastProfileCache) { + HostTensorGenerator<> gen; + auto graph = ComputingGraph::make(); + auto host_x = gen({4, 3, 8, 9}), host_y = gen({2, 3, 3, 3}); + auto x = opr::Host2DeviceCopy::make(*graph, host_x), + y = opr::Host2DeviceCopy::make(*graph, host_y), + z = opr::Convolution::make(x, y); + auto&& conv = z.node()->owner_opr()->cast_final_safe(); + using S = opr::Convolution::ExecutionPolicy::Strategy; + ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy); + gopt::modify_opr_algo_strategy_inplace({z + 2.3f}, + S::PROFILE | S::OPTMIZED); + ASSERT_EQ(S::PROFILE | S::OPTMIZED, conv.execution_policy().strategy); } TEST(TestGoptInference, AlgoWorkspaceLimit) { diff --git a/src/opr/impl/dnn/dnn.sereg.h b/src/opr/impl/dnn/dnn.sereg.h index b60f87285420fb33d006a9eb41360882cfabcb25..2082f42fd60dbe961520e25b62a3cbd1a6333583 100644 --- a/src/opr/impl/dnn/dnn.sereg.h +++ b/src/opr/impl/dnn/dnn.sereg.h @@ -20,7 +20,6 @@ #include "megbrain/opr/dnn/lrn.h" #include "megbrain/opr/dnn/fake_quant.h" #include "megbrain/opr/dnn/tqt.h" - #include "megbrain/serialization/sereg.h" #include "megdnn/opr_param_defs.h" #include "megdnn/oprs/nn.h" diff --git a/src/opr/impl/search_policy/algo_chooser.cpp b/src/opr/impl/search_policy/algo_chooser.cpp index 1985d8d01ff4363cbf484654323b3826843614e5..22c49155782363a23ca9dbd48c68a7ab8da62770 100644 --- a/src/opr/impl/search_policy/algo_chooser.cpp +++ b/src/opr/impl/search_policy/algo_chooser.cpp @@ -284,8 +284,9 @@ namespace mgb { namespace opr { template -void AlgoChooser::profile(ExeContext& ctx, bool require_reproducible) { - if (ctx.get_profile_result_from_cache(require_reproducible).valid()) +void AlgoChooser::profile(ExeContext& ctx, + ExecutionStrategy select_strategy) { + if (ctx.get_profile_result_from_cache(select_strategy).valid()) return; AlgoChooserProfileCache::Result prof_rst; @@ -305,7 +306,7 @@ void AlgoChooser::profile(ExeContext& ctx, bool require_reproducible) { algo.name.c_str(), str_on_inp_shape.c_str()); ImplExecutionPolicy policy; policy.algo = algo.desc; - ctx.construct_execution_policy(require_reproducible, policy); + ctx.construct_execution_policy(select_strategy, policy); if (ctx.get_workspace_size_bytes(policy) >= workspace_limit) continue; @@ -354,7 +355,8 @@ void AlgoChooser::profile(ExeContext& ctx, bool require_reproducible) { template typename AlgoChooser::ImplExecutionPolicy -AlgoChooser::choose_by_profile(ExeContext& ctx, bool require_reproducible, +AlgoChooser::choose_by_profile(ExeContext& ctx, + ExecutionStrategy select_strategy, bool enable_update) { MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("AlgoChooser::choose_by_profile"))) if (ctx.owner_graph()->options().no_profiling_on_shape_change) { @@ -376,11 +378,11 @@ AlgoChooser::choose_by_profile(ExeContext& ctx, bool require_reproducible, to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), _item.param, ctx.mgb_opr(), ctx.comp_node(), ctx.execution_policy(), ctx.allow_weight_preprocess()); - AlgoChooser<_Opr>::profile(sub_ctx, require_reproducible); + AlgoChooser<_Opr>::profile(sub_ctx, select_strategy); }); } typename AlgoChooser::ImplExecutionPolicy policy; - ctx.construct_execution_policy(require_reproducible, policy); + ctx.construct_execution_policy(select_strategy, policy); return policy; MIDOUT_E } @@ -402,11 +404,9 @@ size_t AlgoChooser::setup_algo(const FixedTensorLayouts& layouts, ImplExecutionPolicy policy; if (auto algo_choose_hook = mgb_opr->algo_chooser()) { policy = algo_choose_hook(mgb_opr); - ctx.construct_execution_policy( - mgb_opr->execution_policy().strategy == - mixin::AlgoChooserHelper::ExecutionPolicy::Strategy:: - HEURISTIC_REPRODUCIBLE, - policy, false); + ctx.construct_execution_policy((ExecutionStrategy::HEURISTIC | + ExecutionStrategy::REPRODUCIBLE), + policy, false); } if (!policy.algo.valid()) { policy = get_policy(ctx); @@ -419,10 +419,9 @@ size_t AlgoChooser::setup_algo(const FixedTensorLayouts& layouts, Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo); mgb_assert(palgo, "Unknown algo description"); ret.append("): algo=" + std::string(palgo->name())); - ret.append(ssprintf(" workspace=%.2fMiB reproducible=%d", + ret.append(ssprintf(" workspace=%.2fMiB attirbute=%d", workspace / (1024 * 1024.0), - palgo->contain_attribute( - megdnn::AlgoAttribute::REPRODUCIBLE))); + static_cast(palgo->attribute()))); mgb_log_debug("%s", ret.c_str()); megdnn_opr->execution_policy() = policy; @@ -432,41 +431,39 @@ size_t AlgoChooser::setup_algo(const FixedTensorLayouts& layouts, template typename AlgoChooser::ImplExecutionPolicy AlgoChooser::get_policy( ExeContext& ctx) { - using S = mixin::AlgoChooserHelper::ExecutionPolicy::Strategy; MGB_MARK_USED_VAR(TIMEOUT_TOLERANCE); - switch (ctx.execution_policy().strategy) { - case S::HEURISTIC: - return ctx.choose_by_heuristic(); - case S::HEURISTIC_REPRODUCIBLE: - return ctx.choose_by_heuristic(true); - case S::PROFILE_HEURISTIC: { - ImplExecutionPolicy policy = choose_by_profile(ctx, false, false); - if (!policy.algo.valid()) - policy = ctx.choose_by_heuristic(); - return policy; - } + auto opr_strategy = ctx.execution_policy().strategy; + if ((opr_strategy & ExecutionStrategy::HEURISTIC) && + (opr_strategy & ExecutionStrategy::PROFILE)) { + ImplExecutionPolicy policy = + choose_by_profile(ctx, opr_strategy, false); + if (!policy.algo.valid()) + policy = ctx.choose_by_heuristic(opr_strategy); + return policy; + } else if ((opr_strategy & ExecutionStrategy::HEURISTIC)) { + return ctx.choose_by_heuristic(opr_strategy); + } #if MGB_ENABLE_FASTRUN - case S::PROFILE: - return choose_by_profile(ctx, false); - case S::PROFILE_REPRODUCIBLE: - return choose_by_profile(ctx, true); + else if (opr_strategy & ExecutionStrategy::PROFILE) { + return choose_by_profile(ctx, opr_strategy); + } #endif - default: - mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy"); + else { + mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy"); } } -#define INST(Opr) \ - template AlgoChooser::ImplExecutionPolicy \ - AlgoChooser::get_policy(ExeContext& ctx); \ - template void AlgoChooser::profile( \ - ExeContext& ctx, bool require_reproducible); \ - template AlgoChooser::ImplExecutionPolicy \ - AlgoChooser::choose_by_profile( \ - ExeContext& ctx, bool require_reproducible, bool enable_update); \ - template size_t AlgoChooser::setup_algo( \ - const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \ - const MGBOpr* mgb_opr, bool allow_weight_preprocess); \ +#define INST(Opr) \ + template AlgoChooser::ImplExecutionPolicy \ + AlgoChooser::get_policy(ExeContext& ctx); \ + template void AlgoChooser::profile(ExeContext& ctx, \ + ExecutionStrategy); \ + template AlgoChooser::ImplExecutionPolicy \ + AlgoChooser::choose_by_profile( \ + ExeContext& ctx, ExecutionStrategy, bool enable_update); \ + template size_t AlgoChooser::setup_algo( \ + const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \ + const MGBOpr* mgb_opr, bool allow_weight_preprocess); MGB_FOREACH_FASTRUN_OPR(INST) @@ -498,7 +495,7 @@ AlgoChooser::ExeContext::ExeContext( template typename AlgoChooser::ImplAlgo AlgoChooser::ExeContext::get_profile_result_from_cache( - bool require_reproducible) const { + ExecutionStrategy select_strategy) const { MIDOUT_B(Opr, midout_iv(MGB_HASH_STR( "AlgoChooser::ExeContext::get_profile_result_from_cache"))) @@ -522,7 +519,9 @@ AlgoChooser::ExeContext::get_profile_result_from_cache( if (prof.empty()) return {}; for (auto&& i : prof) { - if ((!require_reproducible || i.reproducible)) { + if (!(select_strategy & ExecutionStrategy::REPRODUCIBLE) || + static_cast(i.attribute) & + AlgoAttribute::REPRODUCIBLE) { auto iter = algo_map.find(i.algo); mgb_assert(iter != algo_map.end(), "algorithm %s exists in " @@ -550,7 +549,8 @@ AlgoChooser::ExeContext::get_profile_result_from_cache( template typename AlgoChooser::ImplExecutionPolicy -AlgoChooser::ExeContext::choose_by_heuristic(bool reproducible) const { +AlgoChooser::ExeContext::choose_by_heuristic( + ExecutionStrategy select_strategy) const { if (m_execution_policy.workspace_limit != std::numeric_limits::max()) { @@ -558,6 +558,8 @@ AlgoChooser::ExeContext::choose_by_heuristic(bool reproducible) const { "workspace_limit should not be setted if choose algo by " "heuristic"); } + bool reproducible = static_cast(select_strategy & + ExecutionStrategy::REPRODUCIBLE); auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit( owner_graph(), m_cn, m_execution_policy.workspace_limit); ImplExecutionPolicy policy; @@ -579,7 +581,8 @@ AlgoChooser::ExeContext::choose_by_heuristic(bool reproducible) const { to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(), _item.param, m_base_mgb_opr, m_cn, m_execution_policy, m_allow_weight_preprocess); - policy.sub_policy.push_back(sub_ctx.choose_by_heuristic(reproducible)); + policy.sub_policy.push_back( + sub_ctx.choose_by_heuristic(select_strategy)); }); return policy; @@ -588,9 +591,8 @@ AlgoChooser::ExeContext::choose_by_heuristic(bool reproducible) const { template std::vector::ImplAlgo> AlgoChooser::ExeContext::get_all_candidates() const { - auto heu = choose_by_heuristic(); - auto&& ret = - APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts); + auto heu = choose_by_heuristic(ExecutionStrategy::HEURISTIC); + auto&& ret = APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts); bool found = false; for (size_t i = 0; i < ret.size(); ++i) { if (ret[i].desc == heu.algo) { @@ -611,19 +613,21 @@ AlgoChooser::ExeContext::get_all_candidates() const { template void AlgoChooser::ExeContext::construct_execution_policy( - bool require_reproducible, + ExecutionStrategy select_strategy, typename AlgoChooser::ImplExecutionPolicy& policy, bool retrive_from_cache) const { + bool reproducible = static_cast(select_strategy & + ExecutionStrategy::REPRODUCIBLE); if (!policy.algo.valid()) { if (retrive_from_cache) { policy.algo = - get_profile_result_from_cache(require_reproducible).desc; + get_profile_result_from_cache(select_strategy).desc; } else { auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit( owner_graph(), m_cn, m_execution_policy.workspace_limit); policy.algo = APPLY(m_megdnn_opr->get_algorithm_info_heuristic( args..., workspace_limit, - require_reproducible), + reproducible), m_layouts) .desc; } @@ -647,7 +651,7 @@ void AlgoChooser::ExeContext::construct_execution_policy( _item.param, m_base_mgb_opr, m_cn, m_execution_policy, m_allow_weight_preprocess); policy.sub_policy.push_back({}); - sub_ctx.construct_execution_policy(require_reproducible, + sub_ctx.construct_execution_policy(select_strategy, policy.sub_policy.back(), retrive_from_cache); }); @@ -718,8 +722,7 @@ AlgoChooser::ExeContext::profile_single_algo( return None; return AlgoChooserProfileCache::ResultEntry{ palgo->name(), - palgo->contain_attribute( - megdnn::AlgoAttribute::REPRODUCIBLE), + static_cast(palgo->attribute()), rst.val().time, param.workspace}; } @@ -768,10 +771,10 @@ AlgoChooser::ExeContext::construct_fake_preprocess_filter() const { bool allow_weight_preprocess); \ template typename AlgoChooser::ImplExecutionPolicy \ AlgoChooser::ExeContext::choose_by_heuristic( \ - bool reproducible) const; \ + ExecutionStrategy select_strategy) const; \ template typename AlgoChooser::ImplAlgo \ AlgoChooser::ExeContext::get_profile_result_from_cache( \ - bool require_reproducible) const; \ + ExecutionStrategy select_strategy) const; \ template std::vector::ImplAlgo> \ AlgoChooser::ExeContext::get_all_candidates() const; \ template size_t \ @@ -780,7 +783,7 @@ AlgoChooser::ExeContext::construct_fake_preprocess_filter() const { policy) const; \ template void \ AlgoChooser::ExeContext::construct_execution_policy( \ - bool require_reproducible, \ + ExecutionStrategy select_strategy, \ typename AlgoChooser::ImplExecutionPolicy& policy, \ bool retrive_from_cache) const; \ template Maybe \ diff --git a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h index c32dc6d2c2266b0888bdcfcf1fdf5d1a960b28bc..a9af2081373906423ec28d8914873692d1579a82 100644 --- a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h +++ b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h @@ -35,6 +35,13 @@ MGB_FOREACH_FASTRUN_OPR(cb) #undef cb namespace mgb { + +//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum +//! and megdnn::detail::AlgoAttribute enum +using ExecutionStrategy = megdnn::param::ExecutionPolicy::Strategy; + +using AlgoAttribute = megdnn::AlgoAttribute; + namespace opr { /* =================== AlgoChooser =================== */ @@ -103,7 +110,7 @@ public: const FixedTensorLayouts& layouts() const { return m_layouts; } ImplExecutionPolicy choose_by_heuristic( - bool reproducible = false) const; + ExecutionStrategy select_strategy) const; //! get all candidate algos, and the one choose_by_heuristic() is //! put first @@ -126,19 +133,20 @@ public: const ImplExecutionPolicy& policy, double& timeout) const; //! get all profile algorithm from cache, return invalid if not exists - ImplAlgo get_profile_result_from_cache(bool require_reproducible) const; + ImplAlgo get_profile_result_from_cache( + ExecutionStrategy select_strategy) const; /** * \brief construct execution policy from cache or heuristic. * - * \param require_reproducible select algo which is reproducible + * \param select_strategy select algo which matched this strategy * \param policy execution policy * \param retrive_from_cache retrive algo from cache if set True, get * from heuristic otherwise. */ - void construct_execution_policy( - bool require_reproducible, ImplExecutionPolicy& policy, - bool retrive_from_cache = true) const; + void construct_execution_policy(ExecutionStrategy select_strategy, + ImplExecutionPolicy& policy, + bool retrive_from_cache = true) const; private: Maybe> construct_fake_preprocess_filter() const; @@ -153,11 +161,11 @@ private: //! profile and save to cache - static void profile(ExeContext& ctx, bool require_reproducible); + static void profile(ExeContext& ctx, ExecutionStrategy select_strategy); - static ImplExecutionPolicy choose_by_profile(ExeContext& ctx, - bool require_reproducible, - bool enable_update = true); + static ImplExecutionPolicy choose_by_profile( + ExeContext& ctx, ExecutionStrategy select_strategy, + bool enable_update = true); public: /*! diff --git a/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h b/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h index bea97d1016e63b6c68d24182c468dd4634a067dc..0e94b6387e399f472cdadfa75d5060f4055add7c 100644 --- a/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h +++ b/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h @@ -13,7 +13,6 @@ #pragma once #include "megbrain/graph/operator_node.h" -#include "megbrain/opr/param_defs.h" #include "megdnn/oprs/base.h" #include "megdnn/oprs/nn.h" @@ -73,7 +72,6 @@ protected: }; } // namespace mixin - } // namespace opr } // namespace mgb diff --git a/src/opr/test/blas.cpp b/src/opr/test/blas.cpp index 10ada0c9d6151d622fffb5460587f70584e1f985..e02f2b97ead8cd6139d02b575b0b760c6e6fead1 100644 --- a/src/opr/test/blas.cpp +++ b/src/opr/test/blas.cpp @@ -429,10 +429,11 @@ TEST(TestOprDNN, MatrixMulExePolicy) { auto cn = CompNode::load("cpux"); #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy: {S:HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif auto graph = ComputingGraph::make(); diff --git a/src/opr/test/dnn/convolution.cpp b/src/opr/test/dnn/convolution.cpp index 8b0b81fa0c6d5acf9a77655edbd7098d11bbb0fa..9e779f2719e97f77346b7306d8f9bba26f9562bf 100644 --- a/src/opr/test/dnn/convolution.cpp +++ b/src/opr/test/dnn/convolution.cpp @@ -355,11 +355,13 @@ TEST(TestOprDNN, ConvBiasExePolicy) { auto cn = CompNode::load("cpux"); #if MGB_ENABLE_FASTRUN - for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif - auto graph = ComputingGraph::make(); HostTensorGenerator<> gen; @@ -397,7 +399,8 @@ TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) { auto cn = CompNode::load("cpux"); - for (auto strategy: {S::PROFILE, S::PROFILE_REPRODUCIBLE}) { + for (auto strategy : + SmallVector{S::PROFILE, S::PROFILE | S::REPRODUCIBLE}) { auto graph = ComputingGraph::make(); HostTensorGenerator<> gen; @@ -439,10 +442,12 @@ TEST(TestOprDNN, ConvolutionExePolicy) { PersistentCacheHook cache_hook{on_get}; #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif using Checker = AutoOprChecker<2, 1>; @@ -522,10 +527,11 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) { PersistentCacheHook cache_hook{on_get}; #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + {S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE), + S(S::PROFILE | S::HEURISTIC)}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) { #endif using Checker = AutoOprChecker<2, 1>; @@ -1183,9 +1189,12 @@ TEST(TestOprDNN, Convolution3DExePolicy) { using S = Policy::Strategy; #if MGB_ENABLE_FASTRUN - for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif using Checker = AutoOprChecker<2, 1>; @@ -1660,10 +1669,12 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) { PersistentCacheHook cache_hook{on_get}; #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { #else - for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { @@ -1769,10 +1780,12 @@ TEST(TestOprDNN, DeformableConvForward) { Param param; #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { #else - for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { @@ -1936,10 +1949,12 @@ TEST(TestOprDNN, BatchConvBiasForward) { param.sparse = Param::Sparse::DENSE; #if MGB_ENABLE_FASTRUN - for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, - S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE, + S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) { #else - for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) { + for (auto strategy : + SmallVector{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) { #endif auto make_quantized = [&](SymbolVar x, const DType& dtype) { @@ -2080,7 +2095,8 @@ TEST(TestOprDNN, HeuristicReproducible) { constexpr size_t PH = 1, PW = 1, SH = 1, SW = 1; - for (auto strategy : {S::HEURISTIC, S::HEURISTIC_REPRODUCIBLE}) { + for (auto strategy : + SmallVector{S::HEURISTIC, S::HEURISTIC | S::REPRODUCIBLE}) { VarNode* bwd_flt; auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { @@ -2126,7 +2142,7 @@ TEST(TestOprDNN, HeuristicReproducible) { megdnn::Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(algo); mgb_assert(palgo, "Unknown algo description"); - if (strategy == S::HEURISTIC_REPRODUCIBLE) { + if (strategy == S(S::HEURISTIC | S::REPRODUCIBLE)) { EXPECT_TRUE(palgo->contain_attribute( megdnn::AlgoAttribute::REPRODUCIBLE)); } diff --git a/test/src/include/megbrain/test/helper.h b/test/src/include/megbrain/test/helper.h index e122a76112a0f1daaedd512b6c1bc8283fb4270e..92cc29be45bc6022c4a89b98da01aa522eef79a5 100644 --- a/test/src/include/megbrain/test/helper.h +++ b/test/src/include/megbrain/test/helper.h @@ -43,6 +43,7 @@ namespace megdnn { std::ostream &ostr, const DType &dt) { return ostr << dt.name(); } + } // namespace megdnn namespace mgb { diff --git a/tools/param_defs/mgb_opr_param_defs.py b/tools/param_defs/mgb_opr_param_defs.py index c5f2cf2d9455a0ab9990add8f5bf20daacfe1205..d8fd2026e242be0a281a25c833590ec2cb314722 100644 --- a/tools/param_defs/mgb_opr_param_defs.py +++ b/tools/param_defs/mgb_opr_param_defs.py @@ -18,7 +18,7 @@ pdef('PersistentOutputStorage').add_fields( add_const('int32', 'INVALID_AXIS', 'MAX_NDIM'). add_fields('int32', 'axis', 'INVALID_AXIS')) -(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator'). +(pdef('ExecutionPolicy', version=0, is_legacy=True). add_enum('Strategy', Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, ' @@ -33,6 +33,20 @@ pdef('PersistentOutputStorage').add_fields( Doc('workspace_limit', 'workspace limit in bytes'), str(2**64-1)+'ull')) +(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator', version=1). + add_bit_combination_enum('Strategy', + Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), + Doc('PROFILE', + 'run possible algorithms on real device to find the best'), + Doc('REPRODUCIBLE', + 'when profile or heuristic algo selection it require the algos' + 'must be reproducible'), + Doc('OPTMIZED', + 'profile require algos are optmized to achieve fast-profile')). + add_fields('uint64', + Doc('workspace_limit', 'workspace limit in bytes'), + str(2**64-1)+'ull')) + (pdef('AssertEqual'). add_fields('float32', Doc('maxerr', 'max allowed error; error is defined as the minimal '