diff --git a/lite/include/lite/network.h b/lite/include/lite/network.h index 881ed7774ed5371ed1880585bb8ebdf7e0c31465..b06efa0d5e639663c49bc4688c7a55b3fc569402 100644 --- a/lite/include/lite/network.h +++ b/lite/include/lite/network.h @@ -114,6 +114,9 @@ struct LITE_API Options { * model is not pack json information data inside * * @param options configuration of Options + * + * @param auto_optimize_inference lite will detect the device information add + * set the options heuristically */ struct LITE_API Config { bool has_compression = false; @@ -122,6 +125,7 @@ struct LITE_API Config { LiteBackend backend = LiteBackend::LITE_DEFAULT; std::string bare_model_cryption_name = {}; Options options = {}; + bool auto_optimize_inference = false; }; /*! diff --git a/lite/lite-c/include/lite-c/network_c.h b/lite/lite-c/include/lite-c/network_c.h index ddc1bb882c1c768cd1db97ad431147ef2aa92068..7634304a3caa07ca6a6b6f2d97432c16dce3c14a 100644 --- a/lite/lite-c/include/lite-c/network_c.h +++ b/lite/lite-c/include/lite-c/network_c.h @@ -100,6 +100,9 @@ extern LITE_API const LiteOptions default_option; * *\param has_compression flag whether the model is compressed, the compress *method will read form the model + + *\param auto_optimize_inference lite will detect the device information add + * set the options heuristically */ typedef struct LiteConfig { int has_compression; @@ -108,6 +111,7 @@ typedef struct LiteConfig { LiteBackend backend; const char* bare_model_cryption_name; LiteOptions options; + int auto_optimize_inference; } LiteConfig; //! get default config diff --git a/lite/lite-c/src/network.cpp b/lite/lite-c/src/network.cpp index 6d936ab66c43e608ca3428a08f7d649f2e69705b..7419d7a81d30dd06ca714ff1fbeaa0183302e7ac 100644 --- a/lite/lite-c/src/network.cpp +++ b/lite/lite-c/src/network.cpp @@ -42,7 +42,8 @@ LiteConfig default_config_t = { .device_type = LiteDeviceType::LITE_CPU, .backend = LiteBackend::LITE_DEFAULT, .bare_model_cryption_name = nullptr, - .options = default_option}; + .options = default_option, + .auto_optimize_inference = false}; LiteConfig* default_config() { return &default_config_t; } @@ -133,6 +134,8 @@ lite::Config convert_to_lite_config(const LiteConfig c_config) { lite_config.options.enable_nchw32 = c_config.options.enable_nchw32; lite_config.options.enable_nchw64 = c_config.options.enable_nchw64; + lite_config.auto_optimize_inference = c_config.auto_optimize_inference; + return lite_config; } diff --git a/lite/pylite/megenginelite/network.py b/lite/pylite/megenginelite/network.py index f2b270d3cab0d4de96f1ad81c30b45fdf8075d9c..a66bb94d3896483b825daad09f7dc16fe9609eb0 100644 --- a/lite/pylite/megenginelite/network.py +++ b/lite/pylite/megenginelite/network.py @@ -171,15 +171,18 @@ class LiteConfig(Structure): options: configuration of Options + auto_optimize_inference: lite will detect the device information add set the options heuristically + Examples: .. code-block:: from megenginelite import * config = LiteConfig() - config.has_compression = false + config.has_compression = False config.device_type = LiteDeviceType.LITE_CPU config.backend = LiteBackend.LITE_DEFAULT config.bare_model_cryption_name = "AES_default".encode("utf-8") + config.auto_optimize_inference = False """ _fields_ = [ @@ -189,6 +192,7 @@ class LiteConfig(Structure): ("backend", c_int), ("_bare_model_cryption_name", c_char_p), ("options", LiteOptions), + ("auto_optimize_inference", c_int), ] def __init__(self, device_type=LiteDeviceType.LITE_CPU, option=None): @@ -202,6 +206,7 @@ class LiteConfig(Structure): self.use_loader_dynamic_param = 0 self.has_compression = 0 self.backend = LiteBackend.LITE_DEFAULT + self.auto_optimize_inference = 0 @property def bare_model_cryption_name(self): @@ -223,6 +228,7 @@ class LiteConfig(Structure): "backend": LiteBackend(self.backend), "bare_model_cryption_name": self.bare_model_cryption_name, "options": self.options, + "auto_optimize_inference": self.auto_optimize_inference, } return data.__repr__() diff --git a/lite/src/mge/network_impl.cpp b/lite/src/mge/network_impl.cpp index 04065de74e16f881e4830fa6327d9fff7845a2cf..70f74b6ea4159a9790c319f5b0529003793351fc 100644 --- a/lite/src/mge/network_impl.cpp +++ b/lite/src/mge/network_impl.cpp @@ -21,6 +21,10 @@ #include "megcore_opencl.h" #endif +#if defined(MGB_ENABLE_CPUINFO_CHECK) && MGB_ENABLE_CPUINFO +#include "cpuinfo.h" +#endif + #include #include #include @@ -42,14 +46,7 @@ void NetworkImplDft::shared_weight_with(const NetworkImplBase* src_network) { LITE_ASSERT(src_impl.m_loader, "Clone network must after the network is loaded."); m_load_result = src_impl.m_loader->load(m_load_config, true); - //! flag weather the mode is cross compnode model - cross_compnode_model_detect(); - - //! update the IO of the network - update_io(); - - //! replace the IO when there is device input or output - compile_graph(); + configure_after_loaded(); } void NetworkImplDft::application_config() { @@ -364,7 +361,7 @@ void NetworkImplDft::adapt_option_valid() { } } -void NetworkImplDft::global_layout_transform() { +void NetworkImplDft::layout_transform_optimization() { if (m_set_layout_transform) { mgb::ThinHashMap out_var_map; auto output_var_array = mgb::gopt::layout_transform( @@ -382,6 +379,103 @@ void NetworkImplDft::global_layout_transform() { for (auto&& item : m_load_result.output_var_map) { item.second = out_var_map[item.second]; } + } else if (m_user_config->auto_optimize_inference) { + //! set model weight preprocess + m_load_config.comp_graph->options().graph_opt.weight_preprocess = true; + LITE_LOG( + "weight_preprocess is enabled, this maybe use more memory when " + "infernece."); + //! get the current format and data type of the model + bool is_model_nchw = true; + //! is any convolution is int8 + bool is_model_int8 = false; + //! is all convolution is float32 + bool is_model_float32 = true; + float conv_cnt = 0; + float dimshuffle_cnt = 0; + + auto detect_int8_model = [&](const VarNode* input) { + if (input->dtype().enumv() == megdnn::DTypeEnum::QuantizedS8 || + input->dtype().enumv() == megdnn::DTypeEnum::Quantized8Asymm) { + is_model_int8 = true; + is_model_float32 = false; + } else if (input->dtype().enumv() == megdnn::DTypeEnum::Float32) { + is_model_float32 = (is_model_float32 && true); + } else { + is_model_float32 = false; + } + }; + + cg::DepOprIter dep([&](cg::OperatorNodeBase* opr) { + if (auto conv = opr->try_cast_final()) { + if (conv->param().format != megdnn::param::ConvBias::Format::NCHW) { + is_model_nchw = false; + } + conv_cnt++; + detect_int8_model(conv->input(0)); + } else if (auto conv_bias = opr->try_cast_final()) { + if (conv_bias->param().format != + megdnn::param::ConvBias::Format::NCHW) { + is_model_nchw = false; + } + conv_cnt++; + detect_int8_model(conv->input(0)); + } else if (auto dimshuffle = opr->try_cast_final()) { + LITE_MARK_USED_VAR(dimshuffle); + dimshuffle_cnt++; + } + }); + for (auto&& i : m_load_result.output_var_list) + dep.add(i); + + float radio_dimshuffle_conv = 0; + if (conv_cnt > 0) { + radio_dimshuffle_conv = dimshuffle_cnt / conv_cnt; + } + //! format optimize can only applied on nchw model, + //! shufflenet like model will hurt the performance when using nchw88 or nchw44 + //! format, here just heuristically decide the gate radio of + //! dimshuffle and convolution + if (!is_model_nchw || radio_dimshuffle_conv > 0.15f) { + return; + } + + //! determine the layout by the device information + //! TODO: shufflenet like model use nchw88 or nchw44 will hurt the + //! performance + if (m_user_config->device_type == LITE_CPU) { +#if defined(MGB_ENABLE_CPUINFO_CHECK) && MGB_ENABLE_CPUINFO + cpuinfo_initialize(); + //! if all convolution and matmul data type is float32 + if (is_model_float32) { + //! if device is x86 + //! if x86 support avx, use format nchw88 + if (cpuinfo_has_x86_avx()) { + m_load_config.comp_graph->options().graph_opt.enable_nchw88(); + LITE_LOG("Configure model inference with nchw88 format."); + } else if (cpuinfo_has_x86_sse2() && !cpuinfo_has_x86_sse3()) { + //! if x86 only support sse2, use format nchw44 + m_load_config.comp_graph->options().graph_opt.enable_nchw44(); + LITE_LOG("Configure model inference with nchw44 format."); + } else if (cpuinfo_has_arm_neon()) { + //! if device is arm, use format nchw44 + m_load_config.comp_graph->options().graph_opt.enable_nchw44(); + LITE_LOG("Configure model inference with nchw44 format."); + } + } else if (is_model_int8) { + //! if date type of convolution is int8 + //! if device is arm and support dot, use nchw44-dot format + if (cpuinfo_has_arm_neon() && cpuinfo_has_arm_neon_dot()) { + m_load_config.comp_graph->options().graph_opt.enable_nchw44_dot(); + LITE_LOG("Configure model inference with nchw44-dot format."); + } else if (cpuinfo_has_arm_neon()) { + //! if device is arm and do not support dot, use nchw44 format + m_load_config.comp_graph->options().graph_opt.enable_nchw44(); + LITE_LOG("Configure model inference with nchw44 format."); + } + } +#endif + } } } @@ -422,10 +516,13 @@ void NetworkImplDft::load_model( } m_load_result = m_loader->load(m_load_config, true); + configure_after_loaded(); +} +void NetworkImplDft::configure_after_loaded() { modify_exection_policy(); - global_layout_transform(); + layout_transform_optimization(); //! some optimization option maybe invalid in some case, so here just //! auto determine whether some options will apply. diff --git a/lite/src/mge/network_impl.h b/lite/src/mge/network_impl.h index 648115016b3f0c7d031ba2334db9c444b253a4b3..02999e23e47ecc24b5ad9fd49ea06ec7d6974baf 100644 --- a/lite/src/mge/network_impl.h +++ b/lite/src/mge/network_impl.h @@ -178,8 +178,10 @@ private: //! call_back to the outputspec void make_output_spec(); - //! do the global layout transform for the given platform target - void global_layout_transform(); + //! do layout transform for the given platform target, maybe the global + //! layout optimization or heuristically choose the best layout according to + //! the device information + void layout_transform_optimization(); //! modify the execution policy void modify_exection_policy(); @@ -223,6 +225,9 @@ private: //! adapt option valid, it should call after update_io void adapt_option_valid(); + //! configure and optimize network after loaded + void configure_after_loaded(); + private: bool m_async = false; bool m_is_cpu_inplace_mode = false; diff --git a/lite/test/test_network_options.cpp b/lite/test/test_network_options.cpp index 408b5500d4894a4791aead9a73dfac66e9edde33..fd852cdb12024a07ccf3a4da8dfb2ba51ec5c39d 100644 --- a/lite/test/test_network_options.cpp +++ b/lite/test/test_network_options.cpp @@ -48,6 +48,35 @@ TEST(TestNetWorkOptions, no_var_sanity_check_and_record) { compare_lite_tensor(output_tensor, result_mgb); } +TEST(TestNetWorkOptions, auto_optimize_inference_layout) { + Config config; + auto tensor = get_input_data("./input_data.npy"); + std::string model_path = "./shufflenet.mge"; + std::string input_name = "data"; + auto result_mgb = mgb_lar(model_path, config, input_name, tensor); + + config.auto_optimize_inference = true; + + std::shared_ptr network = std::make_shared(config); + network->load_model(model_path); + std::shared_ptr input_tensor = network->get_io_tensor(input_name); + + auto src_ptr = tensor->get_memory_ptr(); + auto src_layout = tensor->get_layout(); + input_tensor->reset(src_ptr, src_layout); + std::shared_ptr output_tensor = network->get_output_tensor(0); + auto result_tensor = std::make_shared( + LiteDeviceType::LITE_CPU, Layout{{1, 1000}, 2, LiteDataType::LITE_FLOAT}); + + void* out_data = result_tensor->get_memory_ptr(); + output_tensor->reset(out_data, result_tensor->get_layout()); + + network->forward(); + network->wait(); + + compare_lite_tensor(output_tensor, result_mgb); +} + TEST(TestNetWorkOptions, const_shape) { Config config; auto tensor = get_input_data("./input_data.npy");