diff --git a/dnn/src/cuda/conv_bias/cudnn_conv.cpp b/dnn/src/cuda/conv_bias/cudnn_conv.cpp index e4025e23f9bb1f6666b71db30873f1c143f59ca7..2d11b7f9e82980f93e2ed3a7d4c9d2d67c7619cc 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv.cpp @@ -138,7 +138,7 @@ void ConvBiasForwardImpl::AlgoCUDNNConv::exec(const ExecArgs& args) const { if (args.z_layout->ndim > 0) { auto z_tensor = *args.z_tensor; if (args.z_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) { - z_tensor.raw_ptr = bundle.get(2); + z_tensor = TensorND{bundle.get(2), args.z_tensor->layout}; z_tensor.layout.dtype = DType(); args.opr->check_or_deduce_dtype_fwd( args.src_layout->dtype, args.filter_layout->dtype, diff --git a/lite/load_and_run/src/helpers/common.h b/lite/load_and_run/src/helpers/common.h index 6fc04bc48baf5e89fa9161bb795fb791c4dbb480..90499393dcf07a35eade7f8843b995a58c08c28f 100644 --- a/lite/load_and_run/src/helpers/common.h +++ b/lite/load_and_run/src/helpers/common.h @@ -36,6 +36,8 @@ enum class RunStage { AFTER_RUNNING_ITER = 6, AFTER_MODEL_RUNNING = 7, + + GLOBAL_OPTIMIZATION = 8, }; /*! * \brief: type of different model diff --git a/lite/load_and_run/src/models/model_mdl.cpp b/lite/load_and_run/src/models/model_mdl.cpp index 63fa6d732fe72fbd5fc3821ca3f85402611072a7..a328448b68bafeff395fc9b35307f2fa1c020e89 100644 --- a/lite/load_and_run/src/models/model_mdl.cpp +++ b/lite/load_and_run/src/models/model_mdl.cpp @@ -52,15 +52,15 @@ void ModelMdl::load_model() { m_model_file->read(&testcase_num, sizeof(testcase_num)); } - auto format = + m_format = mgb::serialization::GraphLoader::identify_graph_dump_format(*m_model_file); mgb_assert( - format.valid(), + m_format.valid(), "invalid format, please make sure model is dumped by GraphDumper"); //! load computing graph of model m_loader = mgb::serialization::GraphLoader::make( - std::move(m_model_file), format.val()); + std::move(m_model_file), m_format.val()); m_load_result = m_loader->load(m_load_config, false); m_load_config.comp_graph.reset(); @@ -87,9 +87,15 @@ void ModelMdl::make_output_spec() { m_asyc_exec = m_load_result.graph_compile(m_output_spec); } -std::shared_ptr& ModelMdl::reset_loader() { - m_loader = mgb::serialization::GraphLoader::make( - m_loader->reset_file(), m_loader->format()); +std::shared_ptr& ModelMdl::reset_loader( + std::unique_ptr input_file) { + if (input_file) { + m_loader = mgb::serialization::GraphLoader::make( + std::move(input_file), m_loader->format()); + } else { + m_loader = mgb::serialization::GraphLoader::make( + m_loader->reset_file(), m_loader->format()); + } return m_loader; } diff --git a/lite/load_and_run/src/models/model_mdl.h b/lite/load_and_run/src/models/model_mdl.h index 2d7923b4d83fd6c3db63e526aee287db20d1e4ea..59d27bd910cb0908250d29177a284ac300f2f8c6 100644 --- a/lite/load_and_run/src/models/model_mdl.h +++ b/lite/load_and_run/src/models/model_mdl.h @@ -50,8 +50,16 @@ public: //! get load config for megDL model mgb::serialization::GraphLoadConfig& get_mdl_config() { return m_load_config; } - //! reset the graph loader for dump_with_testcase model - std::shared_ptr& reset_loader(); + /*! reset the underlying graph loader from which further load() would read() + * + * \param input_file new input_file, can be null + * \return new loader + */ + std::shared_ptr& reset_loader( + std::unique_ptr input_file = {}); + + //! get the underlying graph loader + std::shared_ptr& get_loader() { return m_loader; } //! algo strategy for runing model void set_mdl_strategy(Strategy& u_strategy) { m_strategy = u_strategy; } @@ -88,11 +96,18 @@ public: m_load_config.comp_graph.get(), range); } + std::unique_ptr get_dumper( + std::unique_ptr out_file) { + return mgb::serialization::GraphDumper::make( + std::move(out_file), m_format.val()); + } + private: bool share_model_mem; std::string model_path; std::unique_ptr m_model_file; mgb::serialization::GraphLoadConfig m_load_config; + mgb::Maybe m_format; mgb::serialization::GraphLoader::LoadResult m_load_result; std::shared_ptr m_loader; diff --git a/lite/load_and_run/src/options/layout_trans_options.cpp b/lite/load_and_run/src/options/layout_trans_options.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e08c344c1fd646683ae407a2191d5c5376f84dcb --- /dev/null +++ b/lite/load_and_run/src/options/layout_trans_options.cpp @@ -0,0 +1,148 @@ +/** + * \file lite/load_and_run/src/options/layout_trans_options.h + * + * This file is part of MegEngine, a deep learning framework developed by + * Megvii. + * + * \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved. + */ +#include "layout_trans_options.h" +#include +#include "megbrain/serialization/serializer.h" +#include "misc.h" +#include "models/model_lite.h" +#include "models/model_mdl.h" +namespace lar { + +template <> +void GoptLayoutOption::config_model_internel( + RuntimeParam& runtime_param, std::shared_ptr /* model */) { + if (runtime_param.stage == RunStage::BEFORE_MODEL_LOAD) { + LITE_THROW("lite model don't support global graph optimization"); + } +} + +template <> +void GoptLayoutOption::config_model_internel( + RuntimeParam& runtime_param, std::shared_ptr model) { + if (runtime_param.stage == RunStage::GLOBAL_OPTIMIZATION) { + if (layout_transform) { + auto&& load_result = model->get_mdl_load_result(); + load_result.output_var_list = mgb::gopt::layout_transform( + load_result.output_var_list, layout_transform_target); + + if (!layout_transform_dump_file.empty()) { + auto out_file = mgb::serialization::OutputFile::make_fs( + layout_transform_dump_file.c_str(), 'w'); + auto testcase_num = model->get_testcase_num(); + + if (testcase_num) { + const char* magic = "mgbtest0"; + constexpr size_t len = sizeof(magic); + out_file->write(magic, len); + out_file->write(&testcase_num, sizeof(testcase_num)); + } + + using DumpConfig = mgb::serialization::GraphDumper::DumpConfig; + DumpConfig config{1, false, false}; + auto dumper = model->get_dumper(std::move(out_file)); + dumper->dump(load_result.output_var_list, config); + + if (testcase_num) { + auto input_file = model->get_loader()->reset_file(); + auto current_offset = input_file->tell(); + auto loader = model->reset_loader(std::move(input_file)); + auto testcase = loader->load(model->get_mdl_config(), false); + mgb::serialization::GraphDumper::DumpConfig config{1, false, false}; + for (size_t i = 0; i < testcase_num; ++i) { + auto casefile = mgb::serialization::OutputFile::make_fs( + layout_transform_dump_file.c_str(), 'a'); + auto casedumper = model->get_dumper(std::move(casefile)); + casedumper->dump(testcase.output_var_list, config); + if (i != testcase_num - 1) { + loader = model->reset_loader(); + testcase = loader->load(model->get_mdl_config(), false); + } + } + input_file = model->get_loader()->reset_file(); + input_file->rewind(); + input_file->skip(current_offset); + model->reset_loader(std::move(input_file)); + } + } + } + } +} + +} // namespace lar + +using namespace lar; + +GoptLayoutOption::GoptLayoutOption() { + m_option_name = "gopt_layout"; + if (FLAGS_layout_transform != "cuda" && FLAGS_layout_transform != "cpu" && + FLAGS_layout_transform != "opencl") { + layout_transform = false; + layout_transform_target = mgb::gopt::GraphTuningOptions::Target::UNSPEC; + + } else { + layout_transform = true; + if (FLAGS_layout_transform == "cuda") { + layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CUDA; + } else if (FLAGS_layout_transform == "cpu") { + layout_transform_target = mgb::gopt::GraphTuningOptions::Target::CPU; + } else if (FLAGS_layout_transform == "opencl") { + layout_transform_target = mgb::gopt::GraphTuningOptions::Target::OPENCL; + } + } + layout_transform_dump_file = FLAGS_layout_transform_dump; +} + +bool GoptLayoutOption::is_valid() { + bool ret = false; + if (!FLAGS_layout_transform.empty()) { + if (FLAGS_layout_transform != "cuda" && FLAGS_layout_transform != "cpu" && + FLAGS_layout_transform != "opencl") { + mgb_assert( + false, + "unsupported target(got:%s) for global layout " + "transform", + FLAGS_layout_transform.c_str()); + ret = false; + } else { + ret = true; + } + } + ret = ret || FLAGS_layout_transform_dump.empty(); + return ret; +} + +std::shared_ptr GoptLayoutOption::create_option() { + static std::shared_ptr option(new GoptLayoutOption); + if (GoptLayoutOption::is_valid()) { + return std::static_pointer_cast(option); + } else { + return nullptr; + } +} + +void GoptLayoutOption::config_model( + RuntimeParam& runtime_param, std::shared_ptr model) { + CONFIG_MODEL_FUN; +} + +DEFINE_string( + layout_transform, "", + "Enable global layout transform optimization for computing graph. User should " + "specify the device target for the optimization, and a series of passes will " + "be applied on the computing graph. The passes will benchmark the elapsed time " + "of operators on different tensor layouts, and select fastest implementation " + "for the operators. The optimization process will take some time. The default " + "target is unspec, which all the available for operators will be profiled. So " + "the optimize time will be longer."); +DEFINE_string( + layout_transform_dump, "", + "The computing graph after global layout transform will be dumped to the given " + "file path."); + +REGIST_OPTION_CREATOR(gopt_layout, lar::GoptLayoutOption::create_option); diff --git a/lite/load_and_run/src/options/layout_trans_options.h b/lite/load_and_run/src/options/layout_trans_options.h new file mode 100644 index 0000000000000000000000000000000000000000..de94084df521357f0eae94fb8718441abec980d7 --- /dev/null +++ b/lite/load_and_run/src/options/layout_trans_options.h @@ -0,0 +1,45 @@ +/** + * \file lite/load_and_run/src/options/layout_trans_options.h + * + * This file is part of MegEngine, a deep learning framework developed by + * Megvii. + * + * \copyright Copyright (c) 2020-2021 Megvii Inc. All rights reserved. + */ + +#pragma once + +#include +#include "megbrain/gopt/inference.h" +#include "models/model.h" +#include "option_base.h" +DECLARE_string(layout_transform); +DECLARE_string(layout_transform_dump); + +namespace lar { +class GoptLayoutOption final : public OptionBase { +public: + //! get condition for construct FastRunOption + static bool is_valid(); + + //! creat option using condition from cmdline args + static std::shared_ptr create_option(); + + //! configure model for different runtime_param + void config_model( + RuntimeParam& runtime_param, std::shared_ptr model) override; + + //! get options name for quickly search + std::string option_name() const override { return m_option_name; } + +private: + GoptLayoutOption(); + //! config template for different model + template + void config_model_internel(RuntimeParam&, std::shared_ptr) {} + bool layout_transform; + std::string m_option_name; + std::string layout_transform_dump_file; + mgb::gopt::GraphTuningOptions::Target layout_transform_target; +}; +} // namespace lar diff --git a/lite/load_and_run/src/options/strategy_options.cpp b/lite/load_and_run/src/options/strategy_options.cpp index 0e08e885176b6a135c39a75e05cb04e80584007a..254b9994e7c77a2d64a3990d6bb98337de42d7e7 100644 --- a/lite/load_and_run/src/options/strategy_options.cpp +++ b/lite/load_and_run/src/options/strategy_options.cpp @@ -93,4 +93,4 @@ DEFINE_bool(share_param_mem, false, "load model from shared memeory"); REGIST_OPTION_CREATOR(run_strategy, lar::StrategyOption::create_option); -REGIST_OPTION_CREATOR(run_testcase, lar::TestcaseOption::create_option); \ No newline at end of file +REGIST_OPTION_CREATOR(run_testcase, lar::TestcaseOption::create_option); diff --git a/lite/load_and_run/src/strategys/strategy_normal.cpp b/lite/load_and_run/src/strategys/strategy_normal.cpp index 923cae7cae919ccf0abb4dbce9f028d8ae4fed69..591be03d6002f2b82c51e74c7643f62942fafe8f 100644 --- a/lite/load_and_run/src/strategys/strategy_normal.cpp +++ b/lite/load_and_run/src/strategys/strategy_normal.cpp @@ -60,6 +60,9 @@ void NormalStrategy::run_subline() { m_runtime_param.stage = RunStage::AFTER_MODEL_LOAD; stage_config_model(); + m_runtime_param.stage = RunStage::GLOBAL_OPTIMIZATION; + stage_config_model(); + m_runtime_param.stage = RunStage::BEFORE_OUTSPEC_SET; stage_config_model(); @@ -164,4 +167,4 @@ void NormalStrategy::run() { mgb_assert(false, "--thread must input a positive number!!"); } //! execute before run -} \ No newline at end of file +}