diff --git a/lite/load_and_run/src/models/model.cpp b/lite/load_and_run/src/models/model.cpp index c135b899e0a451032da3967faa53b4fb3f017aee..72b3e857fbf6fefd0c3be98584dd56631872db3e 100644 --- a/lite/load_and_run/src/models/model.cpp +++ b/lite/load_and_run/src/models/model.cpp @@ -36,16 +36,19 @@ std::shared_ptr ModelBase::create_model(std::string model_path) { auto model_type = get_model_type(model_path); - if (ModelType::LITE_MODEL == model_type) { + if (FLAGS_lite) { + mgb_log("run model force lite mode\n"); + return std::make_shared(model_path); + } else if (FLAGS_mdl) { + mgb_log("run model force mdl mode\n"); + return std::make_shared(model_path); + } else if (ModelType::LITE_MODEL == model_type) { return std::make_shared(model_path); - } else if (ModelType::MEGDL_MODEL == model_type) { - if (FLAGS_lite) - return std::make_shared(model_path); - else - return std::make_shared(model_path); } else { - return nullptr; + mgb_assert(ModelType::MEGDL_MODEL == model_type); + return std::make_shared(model_path); } } DEFINE_bool(lite, false, "use megengine lite interface to run model"); +DEFINE_bool(mdl, false, "use megengine mdl interface to run model"); // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/load_and_run/src/models/model.h b/lite/load_and_run/src/models/model.h index d1a017754440c3441d71c91ebdca9694f32e7991..da450490d1b8ed42d6c6d6d71742e0171edf7060 100644 --- a/lite/load_and_run/src/models/model.h +++ b/lite/load_and_run/src/models/model.h @@ -4,6 +4,7 @@ #include "helpers/common.h" #include "megbrain/utils/json.h" DECLARE_bool(lite); +DECLARE_bool(mdl); namespace lar { /*! diff --git a/lite/load_and_run/src/models/model_mdl.h b/lite/load_and_run/src/models/model_mdl.h index 868984b1e888ef486d24c39e6b4b14432d2a818b..43674a24efd35bacf741b152eb511d970a034ad8 100644 --- a/lite/load_and_run/src/models/model_mdl.h +++ b/lite/load_and_run/src/models/model_mdl.h @@ -42,6 +42,8 @@ public: return m_load_result; } + void update_mdl_load_result(const mgb::SymbolVarArray& output_var_array); + //! get load config for megDL model mgb::serialization::GraphLoadConfig& get_mdl_config() { return m_load_config; } diff --git a/lite/load_and_run/src/options/fastrun_options.cpp b/lite/load_and_run/src/options/fastrun_options.cpp index 8875a815dd74b8e4b060727da1fa1cbce075f46a..043f178221b19370a4a6598bbc960a100e4acdbe 100644 --- a/lite/load_and_run/src/options/fastrun_options.cpp +++ b/lite/load_and_run/src/options/fastrun_options.cpp @@ -31,6 +31,13 @@ void FastRunOption::config_model_internel( LITE_LOG("enable fast-run strategy for algo profile"); strategy = static_cast(Strategy::LITE_ALGO_PROFILE) | static_cast(Strategy::LITE_ALGO_OPTIMIZED) | strategy; + } else if ((!m_fast_run_cache.empty() && + !access(m_fast_run_cache.c_str(), F_OK))) { + LITE_LOG( + "detect fast-run cache usable set LITE_ALGO_PROFILE for algo " + "profile"); + strategy = static_cast(Strategy::LITE_ALGO_PROFILE) | + static_cast(Strategy::LITE_ALGO_HEURISTIC) | strategy; } else { strategy = static_cast(Strategy::LITE_ALGO_HEURISTIC) | strategy; } diff --git a/lite/load_and_run/src/options/optimize_options.cpp b/lite/load_and_run/src/options/optimize_options.cpp index b1506cc25c745d3c3c53a27df355daf2f04715be..6ff868b6ed28f5f52b22fd7c98b114020ccce116 100644 --- a/lite/load_and_run/src/options/optimize_options.cpp +++ b/lite/load_and_run/src/options/optimize_options.cpp @@ -299,6 +299,75 @@ void FuseConvBiasElemwiseAddOption::config_model( CONFIG_MODEL_FUN; } +///////////////////////// optimize for inference options /////////////// +bool OptimizeForInferenceOption::m_valid; +namespace lar { +template <> +void OptimizeForInferenceOption::config_model_internel( + RuntimeParam& runtime_param, std::shared_ptr model) { + LITE_MARK_USED_VAR(model); + if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) { + auto optimize_for_infer = + std::static_pointer_cast(m_option["optimize_for_inference"]) + ->get_value(); + if (optimize_for_infer) { + LITE_THROW( + "optimize for inference not supported in lite " + "model"); + } + } +} + +template <> +void OptimizeForInferenceOption::config_model_internel( + RuntimeParam& runtime_param, std::shared_ptr model) { + if (runtime_param.stage == RunStage::AFTER_MODEL_LOAD) { + auto optimize_for_infer = + std::static_pointer_cast(m_option["optimize_for_inference"]) + ->get_value(); + if (optimize_for_infer) { + mgb_log("enable optimize for inference optimization"); + auto&& load_result = model->get_mdl_load_result(); + mgb::cg::GraphCommonOptimizeOptions opt = + model->get_mdl_load_result().graph->options().graph_opt; + auto inference_opt2 = mgb::gopt::OptimizeForInferenceOptions(opt); + auto output_var_list = mgb::gopt::optimize_for_inference( + load_result.output_var_list, inference_opt2); + model->get_mdl_load_result().update_output_var_list(output_var_list); + model->get_mdl_load_result().graph->options().graph_opt.clear(); + } + } +} +} // namespace lar + +void OptimizeForInferenceOption::update() { + m_option_name = "optimize_for_inference"; + m_option = {{"optimize_for_inference", lar::Bool::make(false)}}; + std::static_pointer_cast(m_option["optimize_for_inference"]) + ->set_value(FLAGS_optimize_for_inference); +} + +bool OptimizeForInferenceOption::is_valid() { + bool ret = FLAGS_optimize_for_inference; + return ret || m_valid; +} + +std::shared_ptr OptimizeForInferenceOption::create_option() { + static std::shared_ptr option( + new OptimizeForInferenceOption); + if (OptimizeForInferenceOption::is_valid()) { + option->update(); + return option; + } else { + return nullptr; + } +} + +void OptimizeForInferenceOption::config_model( + RuntimeParam& runtime_param, std::shared_ptr model) { + CONFIG_MODEL_FUN; +} + ///////////////////////// graph retrict options ///////////////////////// bool GraphRecordOption::m_valid; namespace lar { @@ -646,6 +715,9 @@ DEFINE_bool( enable_fuse_conv_bias_with_z, false, "fuse conv, bias (elemwise add), z(elemwise add) into one opr " "(only support on GPU)"); +DEFINE_bool( + optimize_for_inference, false, + "whether to optimize_for_inference, fuse bn and many base optimize"); ///////////////////////// graph retrict options ///////////////////////// DEFINE_bool( @@ -699,6 +771,11 @@ REGIST_OPTION_CREATOR( REGIST_OPTION_VALIDATER( fuse_conv_bias_nonlinearity, lar::FuseConvBiasNonlinearOption::set_valid); +REGIST_OPTION_CREATOR( + optimize_for_inference, lar::OptimizeForInferenceOption::create_option); +REGIST_OPTION_VALIDATER( + optimize_for_inference, lar::OptimizeForInferenceOption::set_valid); + REGIST_OPTION_CREATOR( fuse_conv_bias_with_z, lar::FuseConvBiasElemwiseAddOption::create_option); REGIST_OPTION_VALIDATER( diff --git a/lite/load_and_run/src/options/optimize_options.h b/lite/load_and_run/src/options/optimize_options.h index 348733214da093d1be7391332e6199ed24de1fe9..bebf4ac716e968068c3a505d0eef7fa0aae9555b 100644 --- a/lite/load_and_run/src/options/optimize_options.h +++ b/lite/load_and_run/src/options/optimize_options.h @@ -5,6 +5,7 @@ #include "option_base.h" DECLARE_bool(enable_fuse_preprocess); +DECLARE_bool(optimize_for_inference); DECLARE_bool(fuse_grain); DECLARE_bool(weight_preprocess); DECLARE_bool(enable_fuse_conv_bias_nonlinearity); @@ -216,6 +217,34 @@ private: uint64_t workspace_limit; }; +///////////////////////// optimize for inference options ///////////////////////// +class OptimizeForInferenceOption final : public OptionBase { +public: + static bool is_valid(); + + static std::shared_ptr create_option(); + + void config_model( + RuntimeParam& runtime_param, std::shared_ptr model) override; + + static void set_valid(bool val) { m_valid = val; } + + std::string option_name() const override { return m_option_name; }; + + OptionValMap* get_option() override { return &m_option; } + + void update() override; + +private: + OptimizeForInferenceOption() = default; + template + void config_model_internel(RuntimeParam&, std::shared_ptr){}; + + std::string m_option_name; + static bool m_valid; + OptionValMap m_option; +}; + ///////////////////////// other options for optimization ///////////////// class JITOption final : public OptionBase { public: diff --git a/lite/src/mge/network_impl.cpp b/lite/src/mge/network_impl.cpp index 70f74b6ea4159a9790c319f5b0529003793351fc..5aec5beb8bb2059dd1989909fb606faa8577d5d9 100644 --- a/lite/src/mge/network_impl.cpp +++ b/lite/src/mge/network_impl.cpp @@ -366,19 +366,7 @@ void NetworkImplDft::layout_transform_optimization() { mgb::ThinHashMap out_var_map; auto output_var_array = mgb::gopt::layout_transform( m_load_result.output_var_list, m_layout_transform_target); - // replace symvar in output_var_list - for (size_t idx = 0; idx < output_var_array.size(); ++idx) { - out_var_map[m_load_result.output_var_list[idx]] = output_var_array[idx]; - m_load_result.output_var_list[idx] = output_var_array[idx]; - } - // replace symvar in output_var_map_id - for (auto&& item : m_load_result.output_var_map_id) { - item.second = out_var_map[item.second]; - } - // replace symvar in output_var_map - for (auto&& item : m_load_result.output_var_map) { - item.second = out_var_map[item.second]; - } + m_load_result.update_output_var_list(output_var_array); } else if (m_user_config->auto_optimize_inference) { //! set model weight preprocess m_load_config.comp_graph->options().graph_opt.weight_preprocess = true; diff --git a/lite/test/test_common.h b/lite/test/test_common.h index 95a784d428c2b3f1c21b14b429b5aff224fb0743..cc02fc69d4f47f92ea7e1392d183075cce7954e5 100644 --- a/lite/test/test_common.h +++ b/lite/test/test_common.h @@ -8,6 +8,7 @@ #include "../src/misc.h" #include "lite/network.h" #include "lite/tensor.h" +#include "megbrain/comp_node.h" #include "megbrain/graph/bases.h" #include "megbrain/plugin/opr_io_dump.h" #include "megbrain/plugin/profiler.h" @@ -167,4 +168,18 @@ __attribute__((unused)) static std::shared_ptr mgb_lar( #endif +static inline bool check_gpu_available(size_t num) { + if (mgb::CompNode::get_device_count(mgb::CompNode::DeviceType::CUDA) < num) { + mgb_log_warn("skip test case that requires %zu GPU(s)", num); + return false; + } + return true; +} +#define REQUIRE_CUDA() \ + { \ + if (!check_gpu_available(1)) { \ + return; \ + } \ + } \ + while (0) // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/test/test_lar_options.cpp b/lite/test/test_lar_options.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5093ccb736d58070e1fa24140267a269a95766f4 --- /dev/null +++ b/lite/test/test_lar_options.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include "test_common.h" +#include "test_options.h" + +using namespace lar; +DECLARE_bool(lite); +DECLARE_bool(cpu); +DECLARE_bool(optimize_for_inference); +#if LITE_WITH_CUDA +DECLARE_bool(cuda); +#endif + +namespace { + +BOOL_OPTION_WRAP(optimize_for_inference); + +BOOL_OPTION_WRAP(lite); +BOOL_OPTION_WRAP(cpu); +#if LITE_WITH_CUDA +BOOL_OPTION_WRAP(cuda); +#endif +} // anonymous namespace + +TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE) { + DEFINE_WRAP(cpu); + std::string model_path = "./shufflenet.mge"; + + TEST_BOOL_OPTION(optimize_for_inference); +} + +#if LITE_WITH_OPENCL +TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE_OPENCL) { + REQUIRE_OPENCL(); + DEFINE_WRAP(opencl); + std::string model_path = "./shufflenet.mge"; + + TEST_BOOL_OPTION(optimize_for_inference); +} +#endif + +#if LITE_WITH_CUDA +TEST(TestLarOption, OPTIMIZE_FOR_INFERENCE_CUDA) { + REQUIRE_CUDA(); + DEFINE_WRAP(cuda); + std::string model_path = "./shufflenet.mge"; + + TEST_BOOL_OPTION(optimize_for_inference); +} +#endif diff --git a/lite/test/test_layout_options.cpp b/lite/test/test_layout_options.cpp index 987a252d99756dce5be7d6fe85536b92478fce86..be9f579755743fe3358c50c2b9adf5d0ae4a6e0a 100644 --- a/lite/test/test_layout_options.cpp +++ b/lite/test/test_layout_options.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "test_common.h" #include "test_options.h" using namespace lar; diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h index 86e1b12fbcb5ff1f09e9640fa3a9944a2c88ba2f..2d545bcb9361a3265433a2823b9fa2dd05b9b3e2 100644 --- a/src/core/include/megbrain/graph/cg.h +++ b/src/core/include/megbrain/graph/cg.h @@ -109,6 +109,16 @@ struct GraphCommonOptimizeOptions { ///< support on Nvidia GPU }; LayoutTransform layout_transform = LayoutTransform::DEFAULT; + void clear() { + f16_io_f32_comp = false; + f16_io_comp = false; + fuse_conv_bias_nonlinearity = false; + fuse_conv_bias_with_z = false; + weight_preprocess = false; + fuse_preprocess = false; + fuse_grain = false; + layout_transform = LayoutTransform::DEFAULT; + } #define SET(n) \ GraphCommonOptimizeOptions& enable_##n() { \ diff --git a/src/gopt/include/megbrain/gopt/inference.h b/src/gopt/include/megbrain/gopt/inference.h index 791f5a8ee2f88fdf58f658b9c9d850abbbcc7796..b3cd9702f93c5dcbcac8d46f3de70af6d2d1ba5b 100644 --- a/src/gopt/include/megbrain/gopt/inference.h +++ b/src/gopt/include/megbrain/gopt/inference.h @@ -312,6 +312,9 @@ public: }; struct OptimizeForInferenceOptions : cg::GraphCommonOptimizeOptions { + OptimizeForInferenceOptions() = default; + OptimizeForInferenceOptions(const cg::GraphCommonOptimizeOptions& opt) + : cg::GraphCommonOptimizeOptions(opt){}; uint64_t serialize() { uint64_t ret = 0; ret |= (uint64_t)layout_transform << 32; diff --git a/src/serialization/impl/serializer.cpp b/src/serialization/impl/serializer.cpp index 5ec543016313e2bf5a30e25c0254649a60c23935..b44800cd4f7837ccd18263f0fb6a02124f704112 100644 --- a/src/serialization/impl/serializer.cpp +++ b/src/serialization/impl/serializer.cpp @@ -17,6 +17,25 @@ std::unique_ptr GraphLoader::LoadResult::graph_compile( return ret; } +void GraphLoader::LoadResult::update_output_var_list( + const SymbolVarArray& output_var_array) { + mgb::ThinHashMap out_var_map; + mgb_assert(output_var_array.size() == output_var_list.size()); + // replace symvar in output_var_list + for (size_t idx = 0; idx < output_var_array.size(); ++idx) { + out_var_map[output_var_list[idx]] = output_var_array[idx]; + output_var_list[idx] = output_var_array[idx]; + } + // replace symvar in output_var_map_id + for (auto&& item : output_var_map_id) { + item.second = out_var_map[item.second]; + } + // replace symvar in output_var_map + for (auto&& item : output_var_map) { + item.second = out_var_map[item.second].rename(item.first); + } +} + void GraphLoader::LoadResult::graph_compile_ahead() { //! when force_output_use_user_specified_memory is set, the output var may //! be changed by gopt, then the var in LoadResult can not exist, so here diff --git a/src/serialization/include/megbrain/serialization/serializer.h b/src/serialization/include/megbrain/serialization/serializer.h index bfacede4b204506ff7c2b3986fc21b81010f7f73..4297ea9f4bcaa2c721c44141a0aae4b020cc8496 100644 --- a/src/serialization/include/megbrain/serialization/serializer.h +++ b/src/serialization/include/megbrain/serialization/serializer.h @@ -45,6 +45,13 @@ public: //! GraphDumper::dump SymbolVarArray output_var_list; + /** + * \brief update output_var_list with output_var_map, output_var_map_id + * + */ + MGE_WIN_DECLSPEC_FUC void update_output_var_list( + const SymbolVarArray& output_var_array); + /*! * \brief call graph->compile() but also checks for comp seq rec *