diff --git a/dnn/include/megdnn/basic_types.h b/dnn/include/megdnn/basic_types.h
index 61f580a5f2f7173d041b6872b0d6990643d53a13..c2902613f6443a684ff15a3ef0657b9528f3038e 100644
--- a/dnn/include/megdnn/basic_types.h
+++ b/dnn/include/megdnn/basic_types.h
@@ -506,10 +506,66 @@ struct DynOutMallocPolicyCall {
     }
 };
 
+
+template <typename T>
+class EnumClassBit {
+    std::underlying_type_t<T> m_val;
+
+    constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {}
+
+public:
+    constexpr EnumClassBit(T v)
+            : m_val(static_cast<std::underlying_type_t<T>>(v)) {}
+
+    constexpr operator T() const { return static_cast<T>(m_val); }
+
+    constexpr explicit operator bool() const { return m_val; }
+
+#define DEF_OPR(op)                                                     \
+    constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
+        return m_val op rhs.m_val;                                      \
+    }
+
+    DEF_OPR(&)
+    DEF_OPR(|)
+    DEF_OPR (^)
+
+    constexpr EnumClassBit operator~() const { return ~m_val; }
+
+#undef DEF_OPR
+};
+
 #endif  // MEGDNN_CC_HOST
 
 }  // namespace megdnn
 
+#define _MEGDNN_DECBO_SINGLE_OPR(cls, op)                                    \
+    inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
+        return ::megdnn::EnumClassBit<cls>(x)                                \
+                op ::megdnn::EnumClassBit<cls>(y);                           \
+    }                                                                        \
+    inline constexpr ::megdnn::EnumClassBit<cls> operator op(                \
+            ::megdnn::EnumClassBit<cls> x, cls y) {                          \
+        return x op ::megdnn::EnumClassBit<cls>(y);                          \
+    }
+
+#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op)          \
+    inline constexpr cls& operator op##=(cls& x, cls y) { \
+        x = x op ::megdnn::EnumClassBit<cls>(y);          \
+        return x;                                         \
+    }
+
+#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls)                          \
+    _MEGDNN_DECBO_SINGLE_OPR(cls, &)                                \
+    _MEGDNN_DECBO_SINGLE_OPR(cls, |)                                \
+    _MEGDNN_DECBO_SINGLE_OPR(cls, ^)                                \
+    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &)                         \
+    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |)                         \
+    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^)                         \
+    inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
+        return ~::megdnn::EnumClassBit<cls>(x);                     \
+    }
+
 #include "megdnn/internal/visibility_epilogue.h"
 
 // vim: syntax=cpp.doxygen
diff --git a/dnn/include/megdnn/oprs/base.h b/dnn/include/megdnn/oprs/base.h
index 1fedcd5d61715845c695ff34d1fb1f55f8c4c65c..d111a0c2adcb320482b63093b3be0d69b42154df 100644
--- a/dnn/include/megdnn/oprs/base.h
+++ b/dnn/include/megdnn/oprs/base.h
@@ -251,6 +251,8 @@ protected:
     Handle::HandleType m_handle_type = Handle::HandleType::NAIVE;
 };
 
+MEGDNN_DEF_ENUM_CLASS_BIT_OPR(Algorithm::Attribute)
+
 //! policy for executing the operator
 struct ExecutionPolicy {
     //! INVALID_ALGO_TYPE algo_type means using heuristic
diff --git a/dnn/scripts/gen_flatbuffers_schema.py b/dnn/scripts/gen_flatbuffers_schema.py
index f66040f4400f91cc1d82fdd24de626620dd41fd0..3c43561b4f80a1902b539a17219764c01036fba6 100755
--- a/dnn/scripts/gen_flatbuffers_schema.py
+++ b/dnn/scripts/gen_flatbuffers_schema.py
@@ -53,9 +53,13 @@ class FlatBuffersWriter(IndentWriterBase):
             e = self._enums[(p, e)]
             self._write_doc(e.name)
             self._write("enum %s%s : uint {", p, e.name, indent=1)
-            for member in e.members:
+            for idx, member in enumerate(e.members):
                 self._write_doc(member)
-                self._write("%s,", scramble_enum_member_name(str(member)))
+                if e.combined:
+                    self._write("%s=%d,", scramble_enum_member_name(str(member)),
+                            1<<idx)
+                else:
+                    self._write("%s,", scramble_enum_member_name(str(member)))
             self._write("}\n", indent=-1)
 
     def _write_doc(self, doc):
diff --git a/dnn/scripts/gen_param_defs.py b/dnn/scripts/gen_param_defs.py
index 9237f50136c9dae24c76ac6d1dfc89aaa4e9aa5c..47f58af04ca5c1512cf81abd2a6ffb106d63850f 100755
--- a/dnn/scripts/gen_param_defs.py
+++ b/dnn/scripts/gen_param_defs.py
@@ -80,13 +80,13 @@ class member_defs:
         :attr member_alias: list of (member, alias) pairs
         """
         __slots__ = ['name', 'name_field', 'members', 'default',
-                     'member_alias']
+                     'member_alias', 'combined']
 
         all_enums = {}
         """(param_name, name) => enum"""
 
         def __init__(self, param_name, name, name_field, members, default,
-                     member_alias):
+                member_alias, combined = False):
             name = member_defs.Doc.make(name)
             assert name.id[0].isupper()
             members = tuple(map(member_defs.Doc.make, members))
@@ -97,6 +97,7 @@ class member_defs:
                 default = name_field.index(default)
             assert isinstance(default, int)
             self.name = name
+            self.combined = combined
             self.name_field = self.get_name_field(name.id, name_field)
             self.members = members
             self.default = default
@@ -197,6 +198,12 @@ class ParamDef:
             self.name.id, name, name_field, members, default, member_alias))
         return self
 
+    def add_bit_combination_enum(self, name, *members, default=0,
+                 name_field=None, member_alias=[]):
+        self.members.append(member_defs.Enum(
+            self.name.id, name, name_field, members, default, member_alias, True))
+        return self
+
     def add_enum_alias(self, name, src_class, src_name=None, name_field=None,
                        default=None):
         self.members.append(member_defs.EnumAlias(
@@ -463,8 +470,12 @@ class SerializedDType(_ParamDefBase):
         for idx, emem in enumerate(e.members):
             self._write('%s = "%s"', emem, emem)
             self._write_doc(emem)
-            self._enum_member2num.append('id({}.{}):{}'.format(
-                qualname, emem, idx))
+            if e.combined:
+                self._enum_member2num.append('id({}.{}):{}'.format(
+                    qualname, emem, 1<<idx))
+            else:
+                self._enum_member2num.append('id({}.{}):{}'.format(
+                    qualname, emem, idx))
 
         for emem, emem_alis in e.member_alias:
             self._write('%s = %s', emem_alis, emem)
@@ -622,6 +633,8 @@ class CPPWriter(IndentWriterBase):
         for idx, i in enumerate(e.members):
             self._write_doc(i)
             v = '{} = {}'.format(i, idx)
+            if e.combined:
+                v = '{} = 1 << {}'.format(i, idx)
             if i is not e.members[-1] or e.member_alias:
                 v += ','
             self._write(v)
@@ -672,7 +685,6 @@ class CPPEnumValueWriter(CPPWriter):
             self._write('static const uint32_t %s = %s;', alias, mem)
         self._write('};', indent=-1)
 
-
     def _on_member_enum_alias(self, e):
         s = e.src_enum
         self._write('typedef %s::%s %s;', e.src_class, e.src_name, e.name)
diff --git a/dnn/scripts/gen_tablegen.py b/dnn/scripts/gen_tablegen.py
index f1c174c3ab7d496bfebf883783b44c634893c70d..8322058b9fc9f001752484595727d91d91be23e2 100755
--- a/dnn/scripts/gen_tablegen.py
+++ b/dnn/scripts/gen_tablegen.py
@@ -91,12 +91,17 @@ class ConverterWriter(IndentWriterBase):
         def format(v):
             return '\"{}\"'.format(str(v))
         enum_def += ','.join(format(i) for i in e.members)
-        enum_def += "]"
+
+        if e.combined:
+            enum_def += "], 1"
+        else:
+            enum_def += "], 0"
+
         if ENUM_TO_STRING_SPECIAL_RULES.count((p.name, e.name)):
             enum_def += ", 1" # whether generate ToStringTrait
         enum_def += ">"
-        self._write("def {} : {};".format(td_class, enum_def))
 
+        self._write("def {} : {};".format(td_class, enum_def))
         if self._skip_current_param:
             return
 
diff --git a/dnn/src/common/algo_base.h b/dnn/src/common/algo_base.h
index ff9b0d58ae3a63629f480d0e16f2cb357dd53c30..e0f3202bf1d65b63539cebb274619d5f39e7fe29 100644
--- a/dnn/src/common/algo_base.h
+++ b/dnn/src/common/algo_base.h
@@ -21,8 +21,6 @@
 
 namespace megdnn {
 
-MEGDNN_DEF_ENUM_CLASS_BIT_OPR(AlgoAttribute)
-
 #define MEGDNN_DECL_ALGO_TYPE(_type)                              \
     uint32_t type() const override {                              \
         return static_cast<std::underlying_type<AlgoType>::type>( \
diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h
index 3af0ccdf87f5a54729555b72030cb4e936d8e5e2..a0309aedbe481d11efc4c7adc899c6e89ee0b03b 100644
--- a/dnn/src/common/utils.h
+++ b/dnn/src/common/utils.h
@@ -692,61 +692,6 @@ inline void* get_origin_ptr(const TensorND* tensor, void* ptr) {
                               tensor->layout.span().low_byte);
 }
 
-template <typename T>
-class EnumClassBit {
-    std::underlying_type_t<T> m_val;
-
-    constexpr EnumClassBit(std::underlying_type_t<T> v) : m_val(v) {}
-
-public:
-    constexpr EnumClassBit(T v)
-            : m_val(static_cast<std::underlying_type_t<T>>(v)) {}
-
-    constexpr operator T() const { return static_cast<T>(m_val); }
-
-    constexpr explicit operator bool() const { return m_val; }
-
-#define DEF_OPR(op)                                                     \
-    constexpr EnumClassBit operator op(const EnumClassBit& rhs) const { \
-        return m_val op rhs.m_val;                                      \
-    }
-
-    DEF_OPR(&)
-    DEF_OPR(|)
-    DEF_OPR (^)
-
-    constexpr EnumClassBit operator~() const { return ~m_val; }
-
-#undef DEF_OPR
-};
-
-#define _MEGDNN_DECBO_SINGLE_OPR(cls, op)                                    \
-    inline constexpr ::megdnn::EnumClassBit<cls> operator op(cls x, cls y) { \
-        return ::megdnn::EnumClassBit<cls>(x)                                \
-                op ::megdnn::EnumClassBit<cls>(y);                           \
-    }                                                                        \
-    inline constexpr ::megdnn::EnumClassBit<cls> operator op(                \
-            ::megdnn::EnumClassBit<cls> x, cls y) {                          \
-        return x op ::megdnn::EnumClassBit<cls>(y);                          \
-    }
-
-#define _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, op)          \
-    inline constexpr cls& operator op##=(cls& x, cls y) { \
-        x = x op ::megdnn::EnumClassBit<cls>(y);          \
-        return x;                                         \
-    }
-
-#define MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls)                          \
-    _MEGDNN_DECBO_SINGLE_OPR(cls, &)                                \
-    _MEGDNN_DECBO_SINGLE_OPR(cls, |)                                \
-    _MEGDNN_DECBO_SINGLE_OPR(cls, ^)                                \
-    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, &)                         \
-    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, |)                         \
-    _MEGDNN_DECBO_SINGLE_OPR_ASSIGN(cls, ^)                         \
-    inline constexpr ::megdnn::EnumClassBit<cls> operator~(cls x) { \
-        return ~::megdnn::EnumClassBit<cls>(x);                     \
-    }
-
 }  // namespace megdnn
 
 // vim: syntax=cpp.doxygen
diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.h b/dnn/src/cuda/convolution3d/backward_filter/algo.h
index 7e9f84e96c9f595fccb959f0abff67d246da7401..844eb815047cb4dfddf631ce4a7857861ba1a627 100644
--- a/dnn/src/cuda/convolution3d/backward_filter/algo.h
+++ b/dnn/src/cuda/convolution3d/backward_filter/algo.h
@@ -218,4 +218,3 @@ public:
 }  // namespace megdnn
 
 // vim: syntax=cpp.doxygen
-
diff --git a/imperative/python/megengine/functional/debug_param.py b/imperative/python/megengine/functional/debug_param.py
index a257938d1a60b5d4d65ab9e9757b511de27b3999..08bace859c4e3377ad7c6e17645f9de018984f65 100644
--- a/imperative/python/megengine/functional/debug_param.py
+++ b/imperative/python/megengine/functional/debug_param.py
@@ -8,9 +8,12 @@
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import os
 
+from ..core.ops import builtin
 from ..logger import get_logger
 from ..utils.deprecation import deprecated
 
+Strategy = builtin.ops.Convolution.Strategy
+
 _execution_strategy = os.getenv("MEGENGINE_EXECUTION_STRATEGY", "HEURISTIC")
 
 if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None:
@@ -19,7 +22,7 @@ if os.getenv("MEGENGINE_CONV_EXECUTION_STRATEGY") != None:
     )
 
 
-def get_execution_strategy() -> str:
+def get_execution_strategy() -> Strategy:
     """
     Returns the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
 
@@ -28,12 +31,22 @@ def get_execution_strategy() -> str:
     return _execution_strategy
 
 
-def set_execution_strategy(option: str):
+def set_execution_strategy(option):
     """
     Sets the execution strategy of :class:`~.Conv2d` and :func:'~.matmul'
 
-    :param option: Decides how :class:`~.Conv2d` and :func:'~.matmul' algorithms are chosen.
-        Available values:
+    :param option: Decides how :class:`~.Conv2d`and :func:'~.matmul' algorithms are chosen.
+        Available value Strategy
+        * HEURISTIC uses heuristic to choose the fastest algorithm.
+        * PROFILE runs possible algorithms on real device to find the best one.
+        * REPRODUCIBLE uses the algorithms that is reproducible.
+        * OPTMIZED uses the algorithms that is optimized.
+
+        The default strategy is HEURISTIC, this options can be combined to
+        form a combination option, e.g. PROFILE | REPRODUCIBLE
+        can combined a option that uses the fastest of profiling result that is also reproducible.
+
+        Available values string:
 
         * 'HEURISTIC' uses heuristic to choose the fastest algorithm.
         * 'PROFILE' runs possible algorithms on real device to find the best one.
@@ -45,18 +58,29 @@ def set_execution_strategy(option: str):
 
         It can also be set through the environment variable 'MEGENGINE_EXECUTION_STRATEGY'.
     """
-    valid_option = (
-        "HEURISTIC",
-        "PROFILE",
-        "PROFILE_HEURISTIC",
-        "PROFILE_REPRODUCIBLE",
-        "HEURISTIC_REPRODUCIBLE",
-    )
-    if not option in valid_option:
-        raise ValueError("Valid option can only be one of {}".format(valid_option))
+    valid_string_option = {
+        "REPRODUCIBLE": Strategy.REPRODUCIBLE,
+        "HEURISTIC": Strategy.HEURISTIC,
+        "PROFILE": Strategy.PROFILE,
+    }
 
     global _execution_strategy  # pylint: disable=global-statement
-    _execution_strategy = option
+    if isinstance(option, Strategy):
+        _execution_strategy = option
+        return
+
+    assert isinstance(option, str)
+
+    strategy_tmp = Strategy(0)
+    for opt in option.split("_"):
+        if not opt in valid_string_option:
+            raise ValueError(
+                "Valid option can only be one of {}, or combine them with '_'.".format(
+                    valid_string_option.keys()
+                )
+            )
+        strategy_tmp = strategy_tmp | valid_string_option[opt]
+    _execution_strategy = strategy_tmp
 
 
 @deprecated(version="1.3", reason="use get_execution_strategy() instead")
diff --git a/imperative/python/test/integration/test_correctness_mnistnet.py b/imperative/python/test/integration/test_correctness_mnistnet.py
index a88075a23870a1c9fe478d31eecd8dc1a933d722..ce33b54161b216322d06f656b82f0bd2e8a48e07 100644
--- a/imperative/python/test/integration/test_correctness_mnistnet.py
+++ b/imperative/python/test/integration/test_correctness_mnistnet.py
@@ -19,6 +19,7 @@ import megengine.autodiff as ad
 import megengine.functional as F
 from megengine import jit
 from megengine.core._trace_option import set_symbolic_shape
+from megengine.core.ops import builtin
 from megengine.core.tensor.utils import make_shape_tuple
 from megengine.functional.debug_param import set_execution_strategy
 from megengine.jit import SublinearMemoryConfig
@@ -33,6 +34,8 @@ from megengine.module import (
 from megengine.optimizer import SGD
 from megengine.tensor import Tensor
 
+Strategy = builtin.ops.Convolution.Strategy
+
 
 def get_gpu_name():
     try:
@@ -242,7 +245,7 @@ def test_correctness():
     else:
         model_name = "mnist_model_with_test_cpu.mge"
     model_path = os.path.join(os.path.dirname(__file__), model_name)
-    set_execution_strategy("HEURISTIC_REPRODUCIBLE")
+    set_execution_strategy(Strategy.HEURISTIC | Strategy.REPRODUCIBLE)
 
     run_train(model_path, False, False, max_err=1e-5)
     run_train(model_path, True, False, max_err=1e-5)
diff --git a/imperative/tablegen/autogen.cpp b/imperative/tablegen/autogen.cpp
index 5e2b137a1c7050e0487783a49d08b4bcccd4b260..1e00f8f3d7901610d5519065a59f08897c3d12da 100644
--- a/imperative/tablegen/autogen.cpp
+++ b/imperative/tablegen/autogen.cpp
@@ -337,6 +337,20 @@ static void gen_op_def_pybind11_single(raw_ostream &os, MgbOp& op, EnumContext&
                         className, attr->getEnumName(), i
                     ));
                 }
+                if (attr->getEnumCombinedFlag()) {
+                    //! define operator |
+                    os << formatv(
+                            "\n    .def(\"__or__\", []({0}::{1} s0, {0}::{1} s1) {{ "
+                            "\n         return static_cast<{0}::{1}>(uint32_t(s0) | uint32_t(s1));"
+                            "\n      })",
+                            className, attr->getEnumName());
+                    //! define operator &
+                    os << formatv(
+                            "\n    .def(\"__and__\", []({0}::{1} s0, {0}::{1} s1) {{"
+                            "\n         return static_cast<{0}::{1}>(uint32_t(s0) & uint32_t(s1));"
+                            "\n    })",
+                            className, attr->getEnumName());
+                }
                 os << formatv(
                     "\n    .def(py::init([](const std::string& in) {"
                     "\n        auto&& str = normalize_enum(in);"
diff --git a/imperative/tablegen/helper.h b/imperative/tablegen/helper.h
index c5f084e4aa1426dec60df91d7dbbe815c6a9e342..c0fa56fb07fe77f8cee995349343868eb3a7e0ff 100644
--- a/imperative/tablegen/helper.h
+++ b/imperative/tablegen/helper.h
@@ -77,6 +77,9 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase {
     bool supportToString() const {
         return getBaseRecord()->getValueAsBit("supportToString");
     }
+    bool getEnumCombinedFlag() const {
+        return getBaseRecord()->getValueAsBit("enumCombined");
+    }
 };
 
 struct MgbHashableAttrMixin : public MgbAttrWrapperBase {
diff --git a/sdk/load-and-run/src/mgblar.cpp b/sdk/load-and-run/src/mgblar.cpp
index fa131a7e55a7f7609d371b019bb51c1032770634..f25a0e37497e8c6a728210abd84b7e4ed15c0a69 100644
--- a/sdk/load-and-run/src/mgblar.cpp
+++ b/sdk/load-and-run/src/mgblar.cpp
@@ -142,8 +142,16 @@ R"__usage__(
 #if MGB_ENABLE_FASTRUN
 R"__usage__(
   --fast-run
-    Enable fast-run mode. Operators with multiple algorithms would be profiled
-    on the real device with actual input shapes.
+    This param will be deperated later, please replace with param --full-profile.
+ --full-profile
+    Enable full-profile mode. Operators with multiple algorithms would be profiled
+    on the real device with actual input shapes, all algorithms will be profiled
+    include naive algorithms.
+    See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
+ --fast-profile
+    Enable fast-profile mode. Operators with multiple algorithms would be profiled
+    on the real device with actual input shapes, this mode will only profile the
+    well optimized algorithms to get the profile result fast.
     See `mgb::gopt::enable_opr_algo_profiling_inplace` for more details.
 )__usage__"
 #endif
@@ -511,7 +519,8 @@ struct Args {
     bool disable_assert_throw = false;
     bool share_param_mem = false;
 #if MGB_ENABLE_FASTRUN
-    bool use_fast_run = false;
+    bool use_full_profile = false;
+    bool use_fast_profile = false;
 #endif
     bool reproducible = false;
     std::string fast_run_cache_path;
@@ -695,18 +704,20 @@ void run_test_st(Args &env) {
     using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
     S strategy = S::HEURISTIC;
 #if MGB_ENABLE_FASTRUN
-    if (env.use_fast_run) {
+    if (env.use_full_profile) {
         if (env.reproducible) {
-            strategy = S::PROFILE_REPRODUCIBLE;
+            strategy = S::PROFILE | S::REPRODUCIBLE;
         } else {
             strategy = S::PROFILE;
         }
+    } else if (env.use_fast_profile) {
+        strategy = S::PROFILE | S::OPTMIZED;
     } else if (env.reproducible) {
-        strategy = S::HEURISTIC_REPRODUCIBLE;
+        strategy = S::HEURISTIC | S::REPRODUCIBLE;
     }
 #else
     if (env.reproducible) {
-        strategy = S::HEURISTIC_REPRODUCIBLE;
+        strategy = S::HEURISTIC | S::REPRODUCIBLE;
     }
 #endif
     mgb::gopt::modify_opr_algo_strategy_inplace(vars, strategy);
@@ -729,11 +740,12 @@ void run_test_st(Args &env) {
                     std::make_shared<InFilePersistentCache>(buf.get(), flen));
 #if MGB_ENABLE_FASTRUN
         } else {
-            mgb_assert(env.use_fast_run, "fast-run should be enabled");
+            mgb_assert(env.use_full_profile || env.use_fast_profile,
+                       "fast-run or fast-profile should be enabled");
             PersistentCache::set_impl(
                     std::make_shared<InFilePersistentCache>());
         }
-        if (!env.use_fast_run)
+        if (!env.use_full_profile && !env.use_fast_profile)
 #endif
             mgb::gopt::enable_opr_use_profiling_cache_inplace(vars);
     }
@@ -1314,7 +1326,18 @@ Args Args::from_argv(int argc, char **argv) {
         }
 #if MGB_ENABLE_FASTRUN
         if (!strcmp(argv[i], "--fast-run")) {
-            ret.use_fast_run = true;
+            mgb_log_warn(
+                    "--fast-run param will be deperated later, please replace "
+                    "with --full-profile or --fast-profile.");
+            ret.use_full_profile = true;
+            continue;
+        }
+        if (!strcmp(argv[i], "--full-profile")) {
+            ret.use_full_profile = true;
+            continue;
+        }
+        if (!strcmp(argv[i], "--fast-profile")) {
+            ret.use_fast_profile = true;
             continue;
         }
 #endif
diff --git a/src/core/impl/utils/persistent_cache.cpp b/src/core/impl/utils/persistent_cache.cpp
index 8dbbd3c180f8f24569edaa522dd00501e051ae50..19cbf4c1f6f2aeb57ee80ee21d967e5ac08ccf7f 100644
--- a/src/core/impl/utils/persistent_cache.cpp
+++ b/src/core/impl/utils/persistent_cache.cpp
@@ -188,7 +188,7 @@ AlgoChooserProfileCache::get(const Key &key) {
         auto entry_len = read_uint32();
         mgb_assert(buf + entry_len <= buf_end);
         auto nr = sscanf(reinterpret_cast<const char*>(buf), ENTRY_FMT,
-                         &i.reproducible, &i.time, &i.workspace);
+                         &i.attribute, &i.time, &i.workspace);
         mgb_assert(nr == 3);
         buf += entry_len;
     }
@@ -210,10 +210,10 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
         auto &&cur = result[i];
 
         if (prev.workspace <= cur.workspace &&
-                prev.reproducible == cur.reproducible) {
+            prev.attribute == cur.attribute) {
             result.erase(result.begin() + i);
         } else {
-            ++ i;
+            ++i;
         }
     }
 
@@ -235,8 +235,8 @@ void AlgoChooserProfileCache::put(const Key &key, Result &result) {
         write_uint32(0);
         pos = val.size();
         val.resize(pos + SPR_SIZE);
-        uint32_t nr = snprintf(&val[pos], SPR_SIZE,
-                ENTRY_FMT, i.reproducible, i.time, i.workspace);
+        uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute,
+                               i.time, i.workspace);
         //! for memory boundary failed, snprintf ret do not contain \0
         nr += 1;
         mgb_assert(nr < SPR_SIZE);
diff --git a/src/core/include/megbrain/common.h b/src/core/include/megbrain/common.h
index c90b461ff6e2bf0c7e1ba092df00f1d91e869935..085ff414481db259a15034f02a69af4120810e04 100644
--- a/src/core/include/megbrain/common.h
+++ b/src/core/include/megbrain/common.h
@@ -12,6 +12,8 @@
 #pragma once
 
 #include "megbrain_build_config.h"
+#include "megbrain/opr/param_defs.h"
+#include "megdnn/basic_types.h"
 
 #include <memory>
 #include <string>
@@ -242,6 +244,16 @@ inline constexpr std::size_t operator"" _z(unsigned long long n) {
     return n;
 }
 #endif
+
+#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
+    MEGDNN_DEF_ENUM_CLASS_BIT_OPR(cls)
+
 }   // namespace mgb
 
+namespace megdnn {
+namespace param {
+MGB_DEF_ENUM_CLASS_BIT_OPR(ExecutionPolicy::Strategy)
+}
+}  // namespace megdnn
+
 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h
index 2ff854495db00056a2ce01160f4136f575660595..265f8e67746903dba272df151a57d2da5c338ec4 100644
--- a/src/core/include/megbrain/comp_node.h
+++ b/src/core/include/megbrain/comp_node.h
@@ -12,7 +12,6 @@
 #pragma once
 
 #include "megbrain/utils/hash.h"
-#include "megbrain/utils/enum_class_bit.h"
 #include "megbrain/utils/metahelper.h"
 #include "megbrain/utils/thin/hash_table.h"
 #include "megbrain/utils/thread.h"
diff --git a/src/core/include/megbrain/graph/operator_node.h b/src/core/include/megbrain/graph/operator_node.h
index a8ea098051d9762ad774b2dffa51974b1e36c539..27c597416dd6270e28b4577c64fe1635e761d264 100644
--- a/src/core/include/megbrain/graph/operator_node.h
+++ b/src/core/include/megbrain/graph/operator_node.h
@@ -16,7 +16,6 @@
 #include "megbrain/graph/symbol_var.h"
 
 #include "megbrain/utils/hashable.h"
-#include "megbrain/utils/enum_class_bit.h"
 #include "megbrain/utils/thin/hash_table.h"
 #include "megbrain/utils/small_vector.h"
 
diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h
index e10319591fe0c8d3cb5463f3636fd208aef84c38..0b61902343173e8b25252cb2950a80394562e0d4 100644
--- a/src/core/include/megbrain/graph/var_node.h
+++ b/src/core/include/megbrain/graph/var_node.h
@@ -12,7 +12,6 @@
 #pragma once
 
 #include "megbrain/graph/bases.h"
-#include "megbrain/utils/enum_class_bit.h"
 #include "megbrain/utils/comp_node_sync_manager.h"
 #include "megbrain/utils/small_vector.h"
 #include "megbrain/utils/mempool.h"
diff --git a/src/core/include/megbrain/ir/base.td b/src/core/include/megbrain/ir/base.td
index d1f35ebc61ca85721f803302314b2c65fec8080b..ee6b75fc66e4feeec44986ee2386c684c894d7dc 100644
--- a/src/core/include/megbrain/ir/base.td
+++ b/src/core/include/megbrain/ir/base.td
@@ -33,10 +33,11 @@ class MgbHashableAttrMixin {
   string reprFunction = "std::to_string($0)";
 }
 
-class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit toString> {
+class MgbEnumAttrMixin<string namespace, string name, list<string> members, bit combined, bit toString> {
   string parentNamespace = namespace;
   string enumName = name;
   list<string> enumMembers = members;
+  bit enumCombined = combined;
   bit supportToString = toString;
 }
 
@@ -166,8 +167,8 @@ class MgbTupleAttr<list<MgbAttrWrapper> args>:
 }
 
 // -- enum types
-class MgbEnumAttr<string namespace, string enumName, list<string> members, bit toString=0>:
-    HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, toString> {
+class MgbEnumAttr<string namespace, string enumName, list<string> members, bit combined, bit toString=0>:
+    HashableAttr<namespace # "::" # enumName>, MgbEnumAttrMixin<namespace, enumName, members, combined, toString> {
   let storageType = "::mlir::IntegerAttr";
   let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
   let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
@@ -176,7 +177,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members, bit t
 }
 
 class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>:
-    MgbEnumAttr<namespace, enumName, base.enumMembers>, MgbAliasAttrMixin<base>;
+    MgbEnumAttr<namespace, enumName, base.enumMembers, 0>, MgbAliasAttrMixin<base>;
 
 // -- other types
 def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> {
diff --git a/src/core/include/megbrain/utils/enum_class_bit.h b/src/core/include/megbrain/utils/enum_class_bit.h
deleted file mode 100644
index 35e50ccfa2edfd2894d15859bed56037a136a2e7..0000000000000000000000000000000000000000
--- a/src/core/include/megbrain/utils/enum_class_bit.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * \file src/core/include/megbrain/utils/enum_class_bit.h
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
-
-#pragma once
-
-#include <type_traits>
-
-namespace mgb {
-    template<typename T>
-    class EnumClassBit {
-        std::underlying_type_t<T> m_val;
-
-        constexpr EnumClassBit(std::underlying_type_t<T> v):
-            m_val(v)
-        {
-        }
-
-        public:
-            constexpr EnumClassBit(T v):
-                m_val(static_cast<std::underlying_type_t<T>>(v))
-            {
-            }
-
-            constexpr operator T() const {
-                return static_cast<T>(m_val);
-            }
-
-            constexpr explicit operator bool() const {
-                return m_val;
-            }
-
-#define DEF_OPR(op) \
-            constexpr EnumClassBit operator op (\
-                    const EnumClassBit &rhs) const { \
-                return m_val op rhs.m_val; \
-            }
-
-            DEF_OPR(&)
-            DEF_OPR(|)
-            DEF_OPR(^)
-
-            constexpr EnumClassBit operator ~() const {
-                return ~m_val;
-            }
-
-
-#undef DEF_OPR
-    };
-
-}
-
-#define _MGB_DECBO_SINGLE_OPR(cls, op) \
-     inline constexpr ::mgb::EnumClassBit<cls> operator op (cls x, cls y) { \
-         return ::mgb::EnumClassBit<cls>(x) op ::mgb::EnumClassBit<cls>(y); \
-     } \
-     inline constexpr ::mgb::EnumClassBit<cls> operator op ( \
-             ::mgb::EnumClassBit<cls> x, cls y) { \
-         return x op ::mgb::EnumClassBit<cls>(y); \
-     }
-
-#define _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, op) \
-     inline constexpr cls& operator op##= (cls& x, cls y) { \
-         x = x op ::mgb::EnumClassBit<cls>(y); \
-         return x; \
-     }
-
-#define MGB_DEF_ENUM_CLASS_BIT_OPR(cls) \
-    _MGB_DECBO_SINGLE_OPR(cls, &) \
-    _MGB_DECBO_SINGLE_OPR(cls, |) \
-    _MGB_DECBO_SINGLE_OPR(cls, ^) \
-    _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, &) \
-    _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, |) \
-    _MGB_DECBO_SINGLE_OPR_ASSIGN(cls, ^) \
-    inline constexpr ::mgb::EnumClassBit<cls> operator ~ (cls x) { \
-        return ~::mgb::EnumClassBit<cls>(x); \
-    } \
-
-
-
-// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
-
diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h
index 8c2777a9bf273936c3c02446b49aefb26507221d..ef84fa7022f135041873cccf653b7a40319ab6bc 100644
--- a/src/core/include/megbrain/utils/persistent_cache.h
+++ b/src/core/include/megbrain/utils/persistent_cache.h
@@ -100,8 +100,7 @@ namespace mgb {
 
             struct ResultEntry {
                 std::string algo;   //! identifier of the algorithm
-                //! sscanf will up bool as int
-                int reproducible;  //! whether algorithm is reproducible
+                uint32_t attribute;  //! algo attribute, e.g. reproducible
                 double time;        //! execution time in seconds
                 size_t workspace;   //! workspace in bytes
             };
diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp
index 2e4ddcf8cfad03c18fc677416cc72b4732b06d49..3b4a86087f5b525a74afbb28330f231fb97784b6 100644
--- a/src/gopt/impl/inference.cpp
+++ b/src/gopt/impl/inference.cpp
@@ -54,7 +54,6 @@ using namespace gopt;
 
 namespace {
 
-
 template <typename SharedDeviceTensor, typename MultipleDeviceTensorHolder>
 void param_merge(OptState& opt_state) {
     auto rewriter = opt_state.graph().make_rewriter();
@@ -102,7 +101,7 @@ void param_merge(OptState& opt_state) {
     rewriter.apply_inplace();
 }
 
-}
+}  // namespace
 
 /* ================ global functions ================ */
 
@@ -190,12 +189,10 @@ void gopt::enable_opr_algo_profiling_inplace(
 
 void gopt::enable_opr_use_profiling_cache_inplace(
         const VarNodeArrayView& dest_vars) {
-    modify_opr_algo_strategy_inplace(
-            dest_vars, opr::mixin::AlgoChooserHelper::ExecutionPolicy::
-                               Strategy::PROFILE_HEURISTIC);
+    using S = megdnn::param::ExecutionPolicy::Strategy;
+    modify_opr_algo_strategy_inplace(dest_vars, S::PROFILE | S::HEURISTIC);
 }
 
-
 void gopt::set_opr_algo_workspace_limit_inplace(
         const VarNodeArrayView& dest_vars, size_t workspace_limit) {
     static const ThinHashMap<Typeinfo*, void (*)(OperatorNodeBase&, size_t)>
diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp
index 4ef7808e72fdefb5498d9937ed8dbc1e5446b9c9..36d498307009bb4682a5d92ccf726ca35c2f3c99 100644
--- a/src/gopt/test/inference.cpp
+++ b/src/gopt/test/inference.cpp
@@ -1693,7 +1693,22 @@ TEST(TestGoptInference, ProfileCache) {
     using S = opr::Convolution::ExecutionPolicy::Strategy;
     ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy);
     gopt::enable_opr_use_profiling_cache_inplace({z + 2.3f});
-    ASSERT_EQ(S::PROFILE_HEURISTIC, conv.execution_policy().strategy);
+    ASSERT_EQ(S::PROFILE | S::HEURISTIC, conv.execution_policy().strategy);
+}
+
+TEST(TestGoptInference, FastProfileCache) {
+    HostTensorGenerator<> gen;
+    auto graph = ComputingGraph::make();
+    auto host_x = gen({4, 3, 8, 9}), host_y = gen({2, 3, 3, 3});
+    auto x = opr::Host2DeviceCopy::make(*graph, host_x),
+         y = opr::Host2DeviceCopy::make(*graph, host_y),
+         z = opr::Convolution::make(x, y);
+    auto&& conv = z.node()->owner_opr()->cast_final_safe<opr::Convolution>();
+    using S = opr::Convolution::ExecutionPolicy::Strategy;
+    ASSERT_EQ(S::HEURISTIC, conv.execution_policy_transient().strategy);
+    gopt::modify_opr_algo_strategy_inplace({z + 2.3f},
+                                           S::PROFILE | S::OPTMIZED);
+    ASSERT_EQ(S::PROFILE | S::OPTMIZED, conv.execution_policy().strategy);
 }
 
 TEST(TestGoptInference, AlgoWorkspaceLimit) {
diff --git a/src/opr/impl/dnn/dnn.sereg.h b/src/opr/impl/dnn/dnn.sereg.h
index b60f87285420fb33d006a9eb41360882cfabcb25..2082f42fd60dbe961520e25b62a3cbd1a6333583 100644
--- a/src/opr/impl/dnn/dnn.sereg.h
+++ b/src/opr/impl/dnn/dnn.sereg.h
@@ -20,7 +20,6 @@
 #include "megbrain/opr/dnn/lrn.h"
 #include "megbrain/opr/dnn/fake_quant.h"
 #include "megbrain/opr/dnn/tqt.h"
-
 #include "megbrain/serialization/sereg.h"
 #include "megdnn/opr_param_defs.h"
 #include "megdnn/oprs/nn.h"
diff --git a/src/opr/impl/search_policy/algo_chooser.cpp b/src/opr/impl/search_policy/algo_chooser.cpp
index 1985d8d01ff4363cbf484654323b3826843614e5..22c49155782363a23ca9dbd48c68a7ab8da62770 100644
--- a/src/opr/impl/search_policy/algo_chooser.cpp
+++ b/src/opr/impl/search_policy/algo_chooser.cpp
@@ -284,8 +284,9 @@ namespace mgb {
 namespace opr {
 
 template <typename Opr>
-void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
-    if (ctx.get_profile_result_from_cache(require_reproducible).valid())
+void AlgoChooser<Opr>::profile(ExeContext& ctx,
+                               ExecutionStrategy select_strategy) {
+    if (ctx.get_profile_result_from_cache(select_strategy).valid())
         return;
     AlgoChooserProfileCache::Result prof_rst;
 
@@ -305,7 +306,7 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
                                    algo.name.c_str(), str_on_inp_shape.c_str());
         ImplExecutionPolicy policy;
         policy.algo = algo.desc;
-        ctx.construct_execution_policy(require_reproducible, policy);
+        ctx.construct_execution_policy(select_strategy, policy);
         if (ctx.get_workspace_size_bytes(policy) >= workspace_limit)
             continue;
 
@@ -354,7 +355,8 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx, bool require_reproducible) {
 
 template <typename Opr>
 typename AlgoChooser<Opr>::ImplExecutionPolicy
-AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
+AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx,
+                                    ExecutionStrategy select_strategy,
                                     bool enable_update) {
     MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("AlgoChooser::choose_by_profile")))
     if (ctx.owner_graph()->options().no_profiling_on_shape_change) {
@@ -376,11 +378,11 @@ AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx, bool require_reproducible,
                     to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
                     _item.param, ctx.mgb_opr(), ctx.comp_node(),
                     ctx.execution_policy(), ctx.allow_weight_preprocess());
-            AlgoChooser<_Opr>::profile(sub_ctx, require_reproducible);
+            AlgoChooser<_Opr>::profile(sub_ctx, select_strategy);
         });
     }
     typename AlgoChooser<Opr>::ImplExecutionPolicy policy;
-    ctx.construct_execution_policy(require_reproducible, policy);
+    ctx.construct_execution_policy(select_strategy, policy);
     return policy;
     MIDOUT_E
 }
@@ -402,11 +404,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
     ImplExecutionPolicy policy;
     if (auto algo_choose_hook = mgb_opr->algo_chooser()) {
         policy = algo_choose_hook(mgb_opr);
-        ctx.construct_execution_policy(
-                mgb_opr->execution_policy().strategy ==
-                        mixin::AlgoChooserHelper::ExecutionPolicy::Strategy::
-                                HEURISTIC_REPRODUCIBLE,
-                policy, false);
+        ctx.construct_execution_policy((ExecutionStrategy::HEURISTIC |
+                                        ExecutionStrategy::REPRODUCIBLE),
+                                       policy, false);
     }
     if (!policy.algo.valid()) {
         policy = get_policy(ctx);
@@ -419,10 +419,9 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
     Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo);
     mgb_assert(palgo, "Unknown algo description");
     ret.append("): algo=" + std::string(palgo->name()));
-    ret.append(ssprintf(" workspace=%.2fMiB reproducible=%d",
+    ret.append(ssprintf(" workspace=%.2fMiB attirbute=%d",
                         workspace / (1024 * 1024.0),
-                        palgo->contain_attribute(
-                                megdnn::AlgoAttribute::REPRODUCIBLE)));
+                        static_cast<uint32_t>(palgo->attribute())));
     mgb_log_debug("%s", ret.c_str());
 
     megdnn_opr->execution_policy() = policy;
@@ -432,41 +431,39 @@ size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
 template <typename Opr>
 typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy(
         ExeContext& ctx) {
-    using S = mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
     MGB_MARK_USED_VAR(TIMEOUT_TOLERANCE);
-    switch (ctx.execution_policy().strategy) {
-        case S::HEURISTIC:
-            return ctx.choose_by_heuristic();
-        case S::HEURISTIC_REPRODUCIBLE:
-            return ctx.choose_by_heuristic(true);
-        case S::PROFILE_HEURISTIC: {
-            ImplExecutionPolicy policy = choose_by_profile(ctx, false, false);
-            if (!policy.algo.valid())
-                policy = ctx.choose_by_heuristic();
-            return policy;
-        }
+    auto opr_strategy = ctx.execution_policy().strategy;
+    if ((opr_strategy & ExecutionStrategy::HEURISTIC) &&
+               (opr_strategy & ExecutionStrategy::PROFILE)) {
+        ImplExecutionPolicy policy =
+                choose_by_profile(ctx, opr_strategy, false);
+        if (!policy.algo.valid())
+            policy = ctx.choose_by_heuristic(opr_strategy);
+        return policy;
+    } else if ((opr_strategy & ExecutionStrategy::HEURISTIC)) {
+        return ctx.choose_by_heuristic(opr_strategy);
+    }
 #if MGB_ENABLE_FASTRUN
-        case S::PROFILE:
-            return choose_by_profile(ctx, false);
-        case S::PROFILE_REPRODUCIBLE:
-            return choose_by_profile(ctx, true);
+    else if (opr_strategy & ExecutionStrategy::PROFILE) {
+        return choose_by_profile(ctx, opr_strategy);
+    }
 #endif
-        default:
-            mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy");
+    else {
+        mgb_throw(GraphError, "bad convolution ExecutionPolicy strategy");
     }
 }
 
-#define INST(Opr)                                                            \
-    template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy                   \
-    AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx);                   \
-    template void AlgoChooser<megdnn::Opr>::profile(                         \
-            ExeContext& ctx, bool require_reproducible);                     \
-    template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy                   \
-    AlgoChooser<megdnn::Opr>::choose_by_profile(                             \
-            ExeContext& ctx, bool require_reproducible, bool enable_update); \
-    template size_t AlgoChooser<megdnn::Opr>::setup_algo(                    \
-            const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr,      \
-            const MGBOpr* mgb_opr, bool allow_weight_preprocess);            \
+#define INST(Opr)                                                       \
+    template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy              \
+    AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx);              \
+    template void AlgoChooser<megdnn::Opr>::profile(ExeContext& ctx,    \
+                                                    ExecutionStrategy); \
+    template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy              \
+    AlgoChooser<megdnn::Opr>::choose_by_profile(                        \
+            ExeContext& ctx, ExecutionStrategy, bool enable_update);    \
+    template size_t AlgoChooser<megdnn::Opr>::setup_algo(               \
+            const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
+            const MGBOpr* mgb_opr, bool allow_weight_preprocess);
 
 MGB_FOREACH_FASTRUN_OPR(INST)
 
@@ -498,7 +495,7 @@ AlgoChooser<Opr>::ExeContext::ExeContext(
 template <typename Opr>
 typename AlgoChooser<Opr>::ImplAlgo
 AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
-        bool require_reproducible) const {
+        ExecutionStrategy select_strategy) const {
     MIDOUT_B(Opr,
              midout_iv(MGB_HASH_STR(
                      "AlgoChooser::ExeContext::get_profile_result_from_cache")))
@@ -522,7 +519,9 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
     if (prof.empty())
         return {};
     for (auto&& i : prof) {
-        if ((!require_reproducible || i.reproducible)) {
+        if (!(select_strategy & ExecutionStrategy::REPRODUCIBLE) ||
+            static_cast<AlgoAttribute>(i.attribute) &
+                    AlgoAttribute::REPRODUCIBLE) {
             auto iter = algo_map.find(i.algo);
             mgb_assert(iter != algo_map.end(),
                        "algorithm %s exists in "
@@ -550,7 +549,8 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
 
 template <typename Opr>
 typename AlgoChooser<Opr>::ImplExecutionPolicy
-AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
+AlgoChooser<Opr>::ExeContext::choose_by_heuristic(
+        ExecutionStrategy select_strategy) const {
     if (m_execution_policy.workspace_limit !=
         std::numeric_limits<decltype(
                 m_execution_policy.workspace_limit)>::max()) {
@@ -558,6 +558,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
                 "workspace_limit should not be setted if choose algo by "
                 "heuristic");
     }
+    bool reproducible = static_cast<bool>(select_strategy &
+                                          ExecutionStrategy::REPRODUCIBLE);
     auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
             owner_graph(), m_cn, m_execution_policy.workspace_limit);
     ImplExecutionPolicy policy;
@@ -579,7 +581,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
                 to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
                 _item.param, m_base_mgb_opr, m_cn, m_execution_policy,
                 m_allow_weight_preprocess);
-        policy.sub_policy.push_back(sub_ctx.choose_by_heuristic(reproducible));
+        policy.sub_policy.push_back(
+                sub_ctx.choose_by_heuristic(select_strategy));
     });
 
     return policy;
@@ -588,9 +591,8 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(bool reproducible) const {
 template <typename Opr>
 std::vector<typename AlgoChooser<Opr>::ImplAlgo>
 AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
-    auto heu = choose_by_heuristic();
-    auto&& ret =
-            APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
+    auto heu = choose_by_heuristic(ExecutionStrategy::HEURISTIC);
+    auto&& ret = APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
     bool found = false;
     for (size_t i = 0; i < ret.size(); ++i) {
         if (ret[i].desc == heu.algo) {
@@ -611,19 +613,21 @@ AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
 
 template <typename Opr>
 void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
-        bool require_reproducible,
+        ExecutionStrategy select_strategy,
         typename AlgoChooser<Opr>::ImplExecutionPolicy& policy,
         bool retrive_from_cache) const {
+    bool reproducible = static_cast<bool>(select_strategy &
+                                          ExecutionStrategy::REPRODUCIBLE);
     if (!policy.algo.valid()) {
         if (retrive_from_cache) {
             policy.algo =
-                    get_profile_result_from_cache(require_reproducible).desc;
+                    get_profile_result_from_cache(select_strategy).desc;
         } else {
             auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
                     owner_graph(), m_cn, m_execution_policy.workspace_limit);
             policy.algo = APPLY(m_megdnn_opr->get_algorithm_info_heuristic(
                                         args..., workspace_limit,
-                                        require_reproducible),
+                                        reproducible),
                                 m_layouts)
                                   .desc;
         }
@@ -647,7 +651,7 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
                 _item.param, m_base_mgb_opr, m_cn, m_execution_policy,
                 m_allow_weight_preprocess);
         policy.sub_policy.push_back({});
-        sub_ctx.construct_execution_policy(require_reproducible,
+        sub_ctx.construct_execution_policy(select_strategy,
                                            policy.sub_policy.back(),
                                            retrive_from_cache);
     });
@@ -718,8 +722,7 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(
         return None;
     return AlgoChooserProfileCache::ResultEntry{
             palgo->name(),
-            palgo->contain_attribute(
-                    megdnn::AlgoAttribute::REPRODUCIBLE),
+            static_cast<uint32_t>(palgo->attribute()),
             rst.val().time, param.workspace};
 }
 
@@ -768,10 +771,10 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
             bool allow_weight_preprocess);                                     \
     template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy            \
     AlgoChooser<megdnn::Opr>::ExeContext::choose_by_heuristic(                 \
-            bool reproducible) const;                                          \
+            ExecutionStrategy select_strategy) const;                          \
     template typename AlgoChooser<megdnn::Opr>::ImplAlgo                       \
     AlgoChooser<megdnn::Opr>::ExeContext::get_profile_result_from_cache(       \
-            bool require_reproducible) const;                                  \
+            ExecutionStrategy select_strategy) const;                          \
     template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo>          \
     AlgoChooser<megdnn::Opr>::ExeContext::get_all_candidates() const;          \
     template size_t                                                            \
@@ -780,7 +783,7 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
                     policy) const;                                             \
     template void                                                              \
     AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy(          \
-            bool require_reproducible,                                         \
+            ExecutionStrategy select_strategy,                                 \
             typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy,    \
             bool retrive_from_cache) const;                                    \
     template Maybe<AlgoChooserProfileCache::ResultEntry>                       \
diff --git a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h
index c32dc6d2c2266b0888bdcfcf1fdf5d1a960b28bc..a9af2081373906423ec28d8914873692d1579a82 100644
--- a/src/opr/include/megbrain/opr/search_policy/algo_chooser.h
+++ b/src/opr/include/megbrain/opr/search_policy/algo_chooser.h
@@ -35,6 +35,13 @@ MGB_FOREACH_FASTRUN_OPR(cb)
 #undef cb
 
 namespace mgb {
+
+//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum
+//! and megdnn::detail::AlgoAttribute enum
+using ExecutionStrategy = megdnn::param::ExecutionPolicy::Strategy;
+
+using AlgoAttribute = megdnn::AlgoAttribute;
+
 namespace opr {
 
 /* =================== AlgoChooser =================== */
@@ -103,7 +110,7 @@ public:
         const FixedTensorLayouts& layouts() const { return m_layouts; }
 
         ImplExecutionPolicy choose_by_heuristic(
-                bool reproducible = false) const;
+                ExecutionStrategy select_strategy) const;
 
         //! get all candidate algos, and the one choose_by_heuristic() is
         //! put first
@@ -126,19 +133,20 @@ public:
                 const ImplExecutionPolicy& policy, double& timeout) const;
 
         //! get all profile algorithm from cache, return invalid if not exists
-        ImplAlgo get_profile_result_from_cache(bool require_reproducible) const;
+        ImplAlgo get_profile_result_from_cache(
+                ExecutionStrategy select_strategy) const;
 
         /**
          * \brief construct execution policy from cache or heuristic.
          *
-         * \param require_reproducible select algo which is reproducible
+         * \param select_strategy select algo which matched this strategy
          * \param policy execution policy
          * \param retrive_from_cache retrive algo from cache if set True, get
          *     from heuristic otherwise.
          */
-        void construct_execution_policy(
-                bool require_reproducible, ImplExecutionPolicy& policy,
-                bool retrive_from_cache = true) const;
+        void construct_execution_policy(ExecutionStrategy select_strategy,
+                                        ImplExecutionPolicy& policy,
+                                        bool retrive_from_cache = true) const;
 
     private:
         Maybe<PreprocessFilter<Opr>> construct_fake_preprocess_filter() const;
@@ -153,11 +161,11 @@ private:
 
 
     //! profile and save to cache
-    static void profile(ExeContext& ctx, bool require_reproducible);
+    static void profile(ExeContext& ctx, ExecutionStrategy select_strategy);
 
-    static ImplExecutionPolicy choose_by_profile(ExeContext& ctx,
-                                                 bool require_reproducible,
-                                                 bool enable_update = true);
+    static ImplExecutionPolicy choose_by_profile(
+            ExeContext& ctx, ExecutionStrategy select_strategy,
+            bool enable_update = true);
 
 public:
     /*!
diff --git a/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h b/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h
index bea97d1016e63b6c68d24182c468dd4634a067dc..0e94b6387e399f472cdadfa75d5060f4055add7c 100644
--- a/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h
+++ b/src/opr/include/megbrain/opr/search_policy/algo_chooser_helper.h
@@ -13,7 +13,6 @@
 #pragma once
 
 #include "megbrain/graph/operator_node.h"
-#include "megbrain/opr/param_defs.h"
 #include "megdnn/oprs/base.h"
 #include "megdnn/oprs/nn.h"
 
@@ -73,7 +72,6 @@ protected:
 
 };
 }  // namespace mixin
-
 }  // namespace opr
 }  // namespace mgb
 
diff --git a/src/opr/test/blas.cpp b/src/opr/test/blas.cpp
index 10ada0c9d6151d622fffb5460587f70584e1f985..e02f2b97ead8cd6139d02b575b0b760c6e6fead1 100644
--- a/src/opr/test/blas.cpp
+++ b/src/opr/test/blas.cpp
@@ -429,10 +429,11 @@ TEST(TestOprDNN, MatrixMulExePolicy) {
     auto cn = CompNode::load("cpux");
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+                        S::PROFILE | S::HEURISTIC}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy: {S:HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
 
         auto graph = ComputingGraph::make();
diff --git a/src/opr/test/dnn/convolution.cpp b/src/opr/test/dnn/convolution.cpp
index 8b0b81fa0c6d5acf9a77655edbd7098d11bbb0fa..9e779f2719e97f77346b7306d8f9bba26f9562bf 100644
--- a/src/opr/test/dnn/convolution.cpp
+++ b/src/opr/test/dnn/convolution.cpp
@@ -355,11 +355,13 @@ TEST(TestOprDNN, ConvBiasExePolicy) {
     auto cn = CompNode::load("cpux");
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
-
         auto graph = ComputingGraph::make();
         HostTensorGenerator<> gen;
 
@@ -397,7 +399,8 @@ TEST(TestOprDNN, ConvBiasExePolicy_Quantized8Asym) {
 
     auto cn = CompNode::load("cpux");
 
-    for (auto strategy: {S::PROFILE, S::PROFILE_REPRODUCIBLE}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::PROFILE | S::REPRODUCIBLE}) {
 
         auto graph = ComputingGraph::make();
         HostTensorGenerator<> gen;
@@ -439,10 +442,12 @@ TEST(TestOprDNN, ConvolutionExePolicy) {
     PersistentCacheHook cache_hook{on_get};
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
         using Checker = AutoOprChecker<2, 1>;
 
@@ -522,10 +527,11 @@ TEST(TestOprDNN, ConvolutionBackwardDataBfloat16ExePolicy) {
     PersistentCacheHook cache_hook{on_get};
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         {S::PROFILE, S::HEURISTIC, S(S::PROFILE | S::REPRODUCIBLE),
+          S(S::PROFILE | S::HEURISTIC)}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy: {S:HEURISTIC, S(S::PROFILE | S::HEURISTIC)}) {
 #endif
         using Checker = AutoOprChecker<2, 1>;
 
@@ -1183,9 +1189,12 @@ TEST(TestOprDNN, Convolution3DExePolicy) {
     using S = Policy::Strategy;
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy: {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
 
         using Checker = AutoOprChecker<2, 1>;
@@ -1660,10 +1669,12 @@ TEST(TestOprDNN, LocalShareForwardExecPolicy) {
     PersistentCacheHook cache_hook{on_get};
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
 #else
-    for (auto strategy: {S:HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
         auto make_graph = [&](const Checker::SymInpArray& inputs)
                 -> Checker::SymOutArray {
@@ -1769,10 +1780,12 @@ TEST(TestOprDNN, DeformableConvForward) {
     Param param;
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
 #else
-    for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
         auto make_graph = [&](const Checker::SymInpArray& inputs)
                 -> Checker::SymOutArray {
@@ -1936,10 +1949,12 @@ TEST(TestOprDNN, BatchConvBiasForward) {
     param.sparse = Param::Sparse::DENSE;
 
 #if MGB_ENABLE_FASTRUN
-    for (auto strategy : {S::PROFILE, S::HEURISTIC, S::PROFILE_REPRODUCIBLE,
-                          S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S::PROFILE, S::HEURISTIC, S::PROFILE | S::REPRODUCIBLE,
+          S::PROFILE | S::HEURISTIC, S::PROFILE | S::OPTMIZED}) {
 #else
-    for (auto strategy : {S : HEURISTIC, S::PROFILE_HEURISTIC}) {
+    for (auto strategy :
+         SmallVector<S>{S : HEURISTIC, S::PROFILE | S::HEURISTIC}) {
 #endif
 
         auto make_quantized = [&](SymbolVar x, const DType& dtype) {
@@ -2080,7 +2095,8 @@ TEST(TestOprDNN, HeuristicReproducible) {
 
     constexpr size_t PH = 1, PW = 1, SH = 1, SW = 1;
 
-    for (auto strategy : {S::HEURISTIC, S::HEURISTIC_REPRODUCIBLE}) {
+    for (auto strategy :
+         SmallVector<S>{S::HEURISTIC, S::HEURISTIC | S::REPRODUCIBLE}) {
         VarNode* bwd_flt;
         auto make_graph = [&](const Checker::SymInpArray& inputs)
                 -> Checker::SymOutArray {
@@ -2126,7 +2142,7 @@ TEST(TestOprDNN, HeuristicReproducible) {
             megdnn::Algorithm* palgo =
                     megdnn_opr->get_algorithm_from_desc(algo);
             mgb_assert(palgo, "Unknown algo description");
-            if (strategy == S::HEURISTIC_REPRODUCIBLE) {
+            if (strategy == S(S::HEURISTIC | S::REPRODUCIBLE)) {
                 EXPECT_TRUE(palgo->contain_attribute(
                             megdnn::AlgoAttribute::REPRODUCIBLE));
             }
diff --git a/test/src/include/megbrain/test/helper.h b/test/src/include/megbrain/test/helper.h
index e122a76112a0f1daaedd512b6c1bc8283fb4270e..92cc29be45bc6022c4a89b98da01aa522eef79a5 100644
--- a/test/src/include/megbrain/test/helper.h
+++ b/test/src/include/megbrain/test/helper.h
@@ -43,6 +43,7 @@ namespace megdnn {
             std::ostream &ostr, const DType &dt) {
         return ostr << dt.name();
     }
+
 } // namespace megdnn
 
 namespace mgb {
diff --git a/tools/param_defs/mgb_opr_param_defs.py b/tools/param_defs/mgb_opr_param_defs.py
index c5f2cf2d9455a0ab9990add8f5bf20daacfe1205..d8fd2026e242be0a281a25c833590ec2cb314722 100644
--- a/tools/param_defs/mgb_opr_param_defs.py
+++ b/tools/param_defs/mgb_opr_param_defs.py
@@ -18,7 +18,7 @@ pdef('PersistentOutputStorage').add_fields(
  add_const('int32', 'INVALID_AXIS', 'MAX_NDIM').
  add_fields('int32', 'axis', 'INVALID_AXIS'))
 
-(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator').
+(pdef('ExecutionPolicy', version=0, is_legacy=True).
  add_enum('Strategy',
           Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'),
           Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, '
@@ -33,6 +33,20 @@ pdef('PersistentOutputStorage').add_fields(
             Doc('workspace_limit', 'workspace limit in bytes'),
             str(2**64-1)+'ull'))
 
+(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator', version=1).
+ add_bit_combination_enum('Strategy',
+          Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'),
+          Doc('PROFILE',
+              'run possible algorithms on real device to find the best'),
+          Doc('REPRODUCIBLE',
+              'when profile or heuristic algo selection it require the algos'
+              'must be reproducible'),
+          Doc('OPTMIZED',
+              'profile require algos are optmized to achieve fast-profile')).
+ add_fields('uint64',
+            Doc('workspace_limit', 'workspace limit in bytes'),
+            str(2**64-1)+'ull'))
+
 (pdef('AssertEqual').
  add_fields('float32',
             Doc('maxerr', 'max allowed error; error is defined as the minimal '