From e70c07a223e3f0bca3f62e96b5dafc09a45a62f6 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 31 Dec 2021 16:14:09 +0800 Subject: [PATCH] feat(lite): add global layout transform c/c++ interface for lite GitOrigin-RevId: 36a4b26b42079611f38c9f817165a26269749ec3 --- lite/include/lite/network.h | 10 ++++- lite/lite-c/include/lite-c/network_c.h | 16 ++++++++ lite/lite-c/src/network.cpp | 17 ++++++++ lite/src/mge/function_dft.h | 4 ++ lite/src/mge/network_impl.cpp | 56 ++++++++++++++++++++++---- lite/src/mge/network_impl.h | 19 +++++++-- lite/src/network.cpp | 29 +++++++++++++ lite/test/test_network.cpp | 24 +++++++++++ lite/test/test_network_c.cpp | 15 +++++++ 9 files changed, 178 insertions(+), 12 deletions(-) diff --git a/lite/include/lite/network.h b/lite/include/lite/network.h index 3fa6fbefc..9be0cecc8 100644 --- a/lite/include/lite/network.h +++ b/lite/include/lite/network.h @@ -97,7 +97,7 @@ struct LITE_API Options { bool no_profiling_on_shape_change = false; uint8_t jit_level = 0; uint8_t comp_node_seq_record_level = 0; - uint8_t graph_opt_level = 2; + uint8_t graph_opt_level = 0; uint16_t async_exec_level = 1; //! layout transform options @@ -366,6 +366,14 @@ public: static void shared_weight_with_network( std::shared_ptr dst_network, const std::shared_ptr src_network); + + //! set global layout transform optimization for network + + static void enable_global_layout_transform(std::shared_ptr network); + + //! dump network after global layout transform optimization + static void dump_layout_transform_model( + std::shared_ptr network, std::string optimized_model_path); }; } // namespace lite diff --git a/lite/lite-c/include/lite-c/network_c.h b/lite/lite-c/include/lite-c/network_c.h index df01fdb0c..592eba137 100644 --- a/lite/lite-c/include/lite-c/network_c.h +++ b/lite/lite-c/include/lite-c/network_c.h @@ -572,6 +572,22 @@ LITE_API int LITE_enable_io_bin_dump(LiteNetwork network, const char* io_bin_out LITE_API int LITE_get_static_memory_alloc_info( LiteNetwork network, const char* log_dir); +/** + * \brief enable the global layout transform optimization + * \return int if the return is not zero, error happened, the error message + * can get by LITE_get_last_error + */ +LITE_API int LITE_enable_global_layout_transform(LiteNetwork network); + +/** + * \brief dump the model after the global layout transform optimization + * \param[in] dump_file_path The model file path need to dump + * \return int if the return is not zero, error happened, the error message + * can get by LITE_get_last_error + */ +LITE_API int LITE_dump_layout_transform_model( + LiteNetwork network, const char* dump_file_path); + #ifdef __cplusplus } #endif diff --git a/lite/lite-c/src/network.cpp b/lite/lite-c/src/network.cpp index 49fb94248..6814c9cea 100644 --- a/lite/lite-c/src/network.cpp +++ b/lite/lite-c/src/network.cpp @@ -648,4 +648,21 @@ int LITE_get_static_memory_alloc_info(LiteNetwork network, const char* log_dir) LITE_CAPI_END(); } +int LITE_enable_global_layout_transform(LiteNetwork network) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + std::shared_ptr network_shared{ + static_cast(network), [](void*) {}}; + lite::Runtime::enable_global_layout_transform(network_shared); + LITE_CAPI_END(); +} + +int LITE_dump_layout_transform_model(LiteNetwork network, const char* dump_file_path) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + std::shared_ptr network_shared{ + static_cast(network), [](void*) {}}; + lite::Runtime::dump_layout_transform_model(network_shared, dump_file_path); + LITE_CAPI_END(); +} // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/src/mge/function_dft.h b/lite/src/mge/function_dft.h index c776ee272..a4d107214 100644 --- a/lite/src/mge/function_dft.h +++ b/lite/src/mge/function_dft.h @@ -121,6 +121,8 @@ inline void call_func( CALL_FUNC(use_tensorrt); } else if (func_name == "set_cpu_inplace_mode") { CALL_FUNC(set_cpu_inplace_mode); + } else if (func_name == "enable_global_layout_transform") { + CALL_FUNC(enable_global_layout_transform); } else { THROW_FUNC_ERROR(func_name); } @@ -186,6 +188,8 @@ inline void call_func( return CALL_FUNC(enable_io_txt_dump, file_name); } else if (func_name == "enable_io_bin_dump") { return CALL_FUNC(enable_io_bin_dump, file_name); + } else if (func_name == "dump_layout_transform_model") { + return CALL_FUNC(dump_layout_transform_model, file_name); } THROW_FUNC_ERROR(func_name); } diff --git a/lite/src/mge/network_impl.cpp b/lite/src/mge/network_impl.cpp index 1d35e07cb..74c30abd3 100644 --- a/lite/src/mge/network_impl.cpp +++ b/lite/src/mge/network_impl.cpp @@ -22,7 +22,6 @@ #include "megbrain/common.h" #include "megbrain/comp_node.h" #include "megbrain/comp_node_env.h" -#include "megbrain/gopt/inference.h" #include "megbrain/graph.h" #include "megbrain/graph/cg.h" #include "megbrain/opr/io.h" @@ -364,19 +363,26 @@ void NetworkImplDft::adapt_option_valid() { } } +void NetworkImplDft::global_layout_transform() { + if (m_set_layout_transform) { + m_load_result.output_var_list = mgb::gopt::layout_transform( + m_load_result.output_var_list, m_layout_transform_target); + } +} + void NetworkImplDft::load_model( std::shared_ptr model_mem, size_t size, std::unordered_map separate_config_map) { if (!m_loader) { m_input_file = mgb::serialization::InputFile::make_mem_proxy(model_mem, size, false); - auto format = mgb::serialization::GraphLoader::identify_graph_dump_format( + m_format = mgb::serialization::GraphLoader::identify_graph_dump_format( *m_input_file); - if (!format.valid()) { + if (!m_format.valid()) { LITE_THROW("invalid model format"); } m_loader = mgb::serialization::GraphLoader::make( - std::move(m_input_file), format.val()); + std::move(m_input_file), m_format.val()); } //! applay the user configration to mge model @@ -400,7 +406,9 @@ void NetworkImplDft::load_model( use_tensorrt(); } - m_load_result = m_loader->load(m_load_config, true); + m_load_result = m_loader->load(m_load_config, false); + + global_layout_transform(); adapt_option_valid(); @@ -847,9 +855,6 @@ const char* NetworkImplDft::get_input_name(size_t index) const { //! Plugin part void NetworkImplDft::enable_profile_performance(std::string profile_json_file) { #if MGB_ENABLE_JSON -#if MGB_OPENCL - mgb::CompNode::enable_opencl_profile(true); -#endif m_profiler = std::make_unique(m_load_config.comp_graph.get()); m_profiler_output_file = profile_json_file; #else @@ -889,5 +894,40 @@ void NetworkImplDft::get_static_memory_alloc_info(const std::string& log_dir) co LITE_MARK_USED_VAR(log_dir); } +void NetworkImplDft::enable_global_layout_transform() { + m_layout_transform_target = mgb::gopt::GraphTuningOptions::Target::UNSPEC; + + switch (m_user_config->device_type) { + case LiteDeviceType::LITE_CPU: + m_layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CPU; + break; + case LiteDeviceType::LITE_CUDA: + m_layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CUDA; + break; + default: + m_layout_transform_target = mgb::gopt::GraphTuningOptions::Target::UNSPEC; + LITE_WARN( + "lite compnode type: enum value: %d. is unspecial for layout " + "transform", + (int)(m_user_config->device_type)); + } + m_set_layout_transform = true; +} + +void NetworkImplDft::dump_layout_transform_model(std::string optimized_model_path) { + if (m_set_layout_transform) { + auto out_file = mgb::serialization::OutputFile::make_fs( + optimized_model_path.c_str(), 'w'); + using DumpConfig = mgb::serialization::GraphDumper::DumpConfig; + DumpConfig config{1, false, false}; + auto dumper = mgb::serialization::GraphDumper::make( + std::move(out_file), m_format.val()); + dumper->dump(m_load_result.output_var_list, config); + } else { + LITE_THROW( + ssprintf("dump layout transform model should call " + "enable_global_layout_transform before")); + } +} #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/src/mge/network_impl.h b/lite/src/mge/network_impl.h index ec9e61c23..56e7a0755 100644 --- a/lite/src/mge/network_impl.h +++ b/lite/src/mge/network_impl.h @@ -19,6 +19,9 @@ #include "network_impl_base.h" #include "tensor_impl.h" +#include +#include +#include "megbrain/gopt/inference.h" #include "megbrain/graph/bases.h" #include "megbrain/plugin/opr_io_dump.h" #include "megbrain/plugin/profiler.h" @@ -28,9 +31,6 @@ #include "megbrain/serialization/serializer.h" #include "megbrain/utils/thin/hash_table.h" -#include -#include - namespace lite { /*! @@ -170,11 +170,20 @@ public: void get_static_memory_alloc_info( const std::string& log_dir = "logs/test") const override; + //! set global layout transform optimization for network + void enable_global_layout_transform(); + + //! dump network after global layout transform optimization + void dump_layout_transform_model(std::string optimized_model_path); + private: //! construct the outputspec according to the m_network_io, and set the //! call_back to the outputspec void make_output_spec(); + //! do the global layout transform for the given platform target + void global_layout_transform(); + //! modify the execution policy void modify_exection_policy(); @@ -223,6 +232,7 @@ private: int m_nr_device_type = 0; size_t m_nr_threads = 1; bool m_compute_configured_output_only = false; + bool m_set_layout_transform = false; mgb::CompNode::Locator m_compnode_locator; AsyncCallback m_async_callback = nullptr; @@ -233,6 +243,9 @@ private: //! The model load related data S m_execution_policy = static_cast(0); std::unique_ptr m_input_file; + mgb::Maybe m_format; + mgb::gopt::GraphTuningOptions::Target m_layout_transform_target; + mgb::serialization::GraphLoadConfig m_load_config; mgb::serialization::GraphLoader::LoadResult m_load_result; mgb::ComputingGraph::OutputSpec m_output_spec; diff --git a/lite/src/network.cpp b/lite/src/network.cpp index abe1db303..a4ffb4b58 100644 --- a/lite/src/network.cpp +++ b/lite/src/network.cpp @@ -505,4 +505,33 @@ void Runtime::shared_weight_with_network( LITE_ERROR_HANDLER_END } +void Runtime::enable_global_layout_transform(std::shared_ptr network) { + LITE_ERROR_HANDLER_BEGIN + auto network_impl = NetworkHelper::implement(network); + if (network_impl->get_backend_type() == LiteBackend::LITE_DEFAULT) { + LITE_ASSERT( + !NetworkHelper::loaded(network), + "enable_global_layout_transform should be used before model loaded."); + call_func("enable_global_layout_transform", network_impl); + return; + } + LITE_THROW("enable_global_layout_transform is not aviliable in the backend."); + LITE_ERROR_HANDLER_END +} + +void Runtime::dump_layout_transform_model( + std::shared_ptr network, std::string optimized_model_path) { + LITE_ERROR_HANDLER_BEGIN + auto network_impl = NetworkHelper::implement(network); + if (network_impl->get_backend_type() == LiteBackend::LITE_DEFAULT) { + LITE_ASSERT( + NetworkHelper::loaded(network), + "dump_layout_transform_model should be used after model loaded."); + call_func( + "dump_layout_transform_model", network_impl, optimized_model_path); + return; + } + LITE_THROW("dump_layout_transform_model is not aviliable in the backend."); + LITE_ERROR_HANDLER_END +} // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/test/test_network.cpp b/lite/test/test_network.cpp index 2b2564d60..5b9334d88 100644 --- a/lite/test/test_network.cpp +++ b/lite/test/test_network.cpp @@ -909,6 +909,30 @@ TEST(TestNetWork, LoadPackedModel) { network->wait(); } +TEST(TestNetWork, GlabalLayoutTransform) { + // set_log_level(LiteLogLevel::DEBUG); + auto tensor = get_input_data("./input_data.npy"); + std::string model_path = "./shufflenet.mge"; + std::string input_name = "data"; + std::string dump_model_name = "./shufflenet_after_trans.mge"; + + NetworkIO IO; + Config config; + std::shared_ptr network = std::make_shared(config, IO); + Runtime::enable_global_layout_transform(network); + network->load_model(model_path); + + std::shared_ptr input_tensor = network->get_io_tensor(input_name); + auto src_ptr = tensor->get_memory_ptr(); + auto src_layout = tensor->get_layout(); + input_tensor->reset(src_ptr, src_layout); + + Runtime::dump_layout_transform_model(network, dump_model_name); + network->forward(); + network->wait(); + ASSERT_TRUE(fopen(dump_model_name.c_str(), "r")); +} + TEST(TestNetWork, GetDeviceType) { auto tensor = get_input_data("./input_data.npy"); std::string model_path = "./shufflenet.mge"; diff --git a/lite/test/test_network_c.cpp b/lite/test/test_network_c.cpp index 0c16857b9..591e5db60 100644 --- a/lite/test/test_network_c.cpp +++ b/lite/test/test_network_c.cpp @@ -889,6 +889,21 @@ TEST(TestCapiNetWork, ProfileIOdump) { LITE_CAPI_CHECK(LITE_destroy_network(c_network)); } +TEST(TestCapiNetWork, GlabalLayoutTransform) { + ForwardMgb; + MakeNetwork; + LITE_CAPI_CHECK(LITE_enable_global_layout_transform(c_network)); + LoadNetwork; + LITE_CAPI_CHECK(LITE_dump_layout_transform_model( + c_network, "./shufflenet_after_trans.mge")); + SetInput; + ForwardNetwork; + ASSERT_TRUE(fopen("./shufflenet_after_trans.mge", "r")); + GetOutput; + CompareResult; + LITE_CAPI_CHECK(LITE_destroy_network(c_network)); +} + TEST(TestCapiNetWork, GetDeviceType) { lite::Config config; auto lite_tensor = lite::get_input_data("./input_data.npy"); -- GitLab