From fa08a5147dd64e8e1fd7b5988609288c290272e9 Mon Sep 17 00:00:00 2001 From: shentanyue <34421038+shentanyue@users.noreply.github.com> Date: Fri, 19 May 2023 17:23:43 +0800 Subject: [PATCH] [Inference] Save optimized model by pass (#53696) --- .../ir/xpu/delete_isolated_node_pass.cc | 2 + paddle/fluid/inference/analysis/argument.h | 3 +- .../inference/analysis/ir_pass_manager.cc | 2 +- .../inference/analysis/passes/CMakeLists.txt | 5 + .../fluid/inference/analysis/passes/passes.cc | 3 + .../passes/save_optimized_model_pass.cc | 144 ++++++++++++++++++ .../passes/save_optimized_model_pass.h | 39 +++++ paddle/fluid/inference/api/analysis_config.cc | 5 +- .../fluid/inference/api/analysis_predictor.cc | 3 +- .../inference/api/paddle_analysis_config.h | 9 ++ .../fluid/inference/api/paddle_pass_builder.h | 1 + paddle/fluid/pybind/inference_api.cc | 3 + .../api/analysis_predictor_tester.cc | 13 ++ 13 files changed, 228 insertions(+), 4 deletions(-) create mode 100644 paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc create mode 100644 paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h diff --git a/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc b/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc index c543045b5bc..9f12ad5fc51 100644 --- a/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc +++ b/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc @@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames( Graph* graph, std::unordered_set* reserved_persistable_node_names) const { for (auto* node : graph->Nodes()) { + if (!node || node->Name() == "fetch" || node->Name() == "feed") continue; if (!node->IsVar() || !node->Var()->Persistable()) continue; for (auto* out_node : node->outputs) { auto op_type = out_node->Op()->Type(); @@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes( std::unordered_set delete_nodes; const std::unordered_set nodes = graph->Nodes(); for (auto* node : nodes) { + if (!node || node->Name() == "fetch" || node->Name() == "feed") continue; if (!node->IsVar() || !node->Var()->Persistable()) continue; auto name = node->Var()->Name(); if (reserved_persistable_node_names.count(name) > 0) continue; diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index f44c877b939..32e98fda98e 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -146,6 +146,7 @@ struct Argument { DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); + DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool); DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string); DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool); @@ -294,7 +295,7 @@ struct Argument { XpuQuantPostDynamicWeightBits, int); DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types, - XpuQuantPostDynamicOpTypss, + XpuQuantPostDynamicOpTypes, std::vector); DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 4051511906b..1ab60085488 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument, } bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding(); pass->Set("use_fc_padding", new bool(use_fc_padding)); - } else if (pass_name == "fused_multi_transformer_xpu_quant_pass") { + } else if (pass_name == "fused_multi_transformer_xpu_pass") { auto op_types = argument->xpu_quant_post_dynamic_op_types(); if (std::count(op_types.begin(), op_types.end(), diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index 35c03cfc760..bc41a34db5e 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -31,12 +31,17 @@ cc_library( inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass) +cc_library( + save_optimized_model_pass + SRCS save_optimized_model_pass.cc + DEPS analysis_pass argument ir_pass_manager graph_to_program_pass) cc_library( analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass + save_optimized_model_pass ir_params_sync_among_devices_pass adjust_cudnn_workspace_size_pass memory_optim_pass diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc index cd65757d08f..26e0f34c0ec 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" +#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" namespace paddle { namespace inference { @@ -33,6 +34,8 @@ PassRegistry::PassRegistry() { std::unique_ptr(new IrAnalysisPass)); passes_.emplace("ir_graph_build_pass", std::unique_ptr(new IrGraphBuildPass)); + passes_.emplace("save_optimized_model_pass", + std::unique_ptr(new SaveOptimizedModelPass)); passes_.emplace("memory_optimize_pass", std::unique_ptr(new MemoryOptimizePass)); passes_.emplace( diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc new file mode 100644 index 00000000000..a2e96859194 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -0,0 +1,144 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" + +#include +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { + if (!argument->save_optimized_model()) { + LOG(WARNING) << "save_optim_cache_model is turned off, skip " + "save_optimized_model_pass"; + return; + } + if (!argument->enable_ir_optim()) { + LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass"; + return; + } + + std::string model_opt_cache_dir = argument->optim_cache_dir(); + if (!model_opt_cache_dir.empty()) { + if (!PathExists(model_opt_cache_dir)) { + PADDLE_ENFORCE_NE( + MKDIR(model_opt_cache_dir.c_str()), + -1, + platform::errors::PreconditionNotMet( + "Can not create optimize cache directory: %s, Make sure you " + "have permission to write", + model_opt_cache_dir)); + } + } else { + model_opt_cache_dir = argument->Has("model_dir") + ? argument->model_dir() + : GetDirRoot(argument->model_program_path()); + } + + auto& scope = argument->scope(); + auto* graph = argument->main_graph_ptr(); + + framework::ProgramDesc optimized_program_desc; + framework::ir::GraphToProgram(*graph, &optimized_program_desc); + + auto IsPersistable = [](const framework::VarDesc* var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST && + var->GetType() != framework::proto::VarType::RAW) { + return true; + } + return false; + }; + + auto SerializeParams = [&](const std::string& path) { + framework::ProgramDesc save_program; + auto* save_block = save_program.MutableBlock(0); + std::unordered_set save_var_set; + for (size_t i = 0; i < optimized_program_desc.Size(); ++i) { + const auto& global_block = optimized_program_desc.Block(i); + for (framework::VarDesc* var : global_block.AllVars()) { + if (IsPersistable(var)) { + framework::VarDesc* new_var = save_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + save_var_set.insert(new_var->Name()); + } + } + } + + std::string save_params_path = path + "/" + "_optimized.pdiparams"; + std::vector save_var_list(save_var_set.begin(), + save_var_set.end()); + std::sort(save_var_list.begin(), save_var_list.end()); + auto* op = save_block->AppendOp(); + op->SetType("save_combine"); + op->SetInput("X", save_var_list); + op->SetAttr("file_path", save_params_path); + op->CheckAttrs(); + + framework::Executor exe(platform::CPUPlace{}); + exe.Run(save_program, &scope, 0, true, true); + }; + // TODO(shentanyue01): Setting hardware and version identification for + // optimized models. + auto SerializeProg = [&](const std::string& path) { + // All persistable var need to be moved to global block + auto* global_block = optimized_program_desc.MutableBlock(0); + for (size_t i = 1; i < optimized_program_desc.Size(); ++i) { + const auto& sub_block = optimized_program_desc.Block(i); + for (framework::VarDesc* var : sub_block.AllVars()) { + if (IsPersistable(var) && !global_block->HasVar(var->Name())) { + framework::VarDesc* new_var = global_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + } + } + } + std::string save_model_path = path + "/" + "_optimized.pdmodel"; + auto str = optimized_program_desc.Proto()->SerializeAsString(); + std::ofstream file(save_model_path.c_str(), std::ios::binary); + file.write(str.c_str(), str.size()); + file.close(); + }; + + SerializeProg(model_opt_cache_dir); + SerializeParams(model_opt_cache_dir); + LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir; +} + +void SaveOptimizedModelPass::RunImpl(Argument* argument) { + if (argument->use_xpu_valid()) { + SaveOptimizedModel(argument); + } +} + +std::string SaveOptimizedModelPass::repr() const { + return "save_optimized_model_pass"; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h new file mode 100644 index 00000000000..5fc520a6bad --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/inference/analysis/analysis_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Save model optimized by ir pass + */ +class SaveOptimizedModelPass : public AnalysisPass { + public: + void RunImpl(Argument *argument) override; + std::string repr() const override; + + private: + void SaveOptimizedModel(Argument *argument); +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index af7b8574b86..3ea996076c7 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(model_dir_); CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and // params_file_ fields. - + CP_MEMBER(save_optimized_model_); CP_MEMBER(opt_cache_dir_); CP_MEMBER(prog_file_); CP_MEMBER(params_file_); @@ -1025,6 +1025,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << model_dir_; ss << prog_file_; ss << params_file_; + ss << save_optimized_model_; ss << use_gpu_; ss << enable_gpu_mixed_; @@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() { os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"}); // ir info + os.InsertRow( + {"save_optimized_model", save_optimized_model_ ? "true" : "false"}); os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"}); os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"}); os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"}); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d7f08cdf7ce..fe5c43a62b5 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() { // Analyze inference_program argument_->SetPredictorID(predictor_id_); argument_->SetRootPredictorID(root_predictor_id_); + argument_->SetSaveOptimizedModel(config_.save_optimized_model_); argument_->SetOptimCacheDir(config_.opt_cache_dir_); if (!config_.model_dir().empty()) { argument_->SetModelDir(config_.model_dir()); @@ -1521,7 +1522,7 @@ void AnalysisPredictor::PrepareArgument() { argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_); argument_->SetXpuQuantPostDynamicWeightBits( config_.xpu_quant_post_dynamic_weight_bits_); - argument_->SetXpuQuantPostDynamicOpTypss( + argument_->SetXpuQuantPostDynamicOpTypes( config_.xpu_quant_post_dynamic_op_types_); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index b30838c3680..1f2a356229b 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetParamsFile(const std::string& x) { params_file_ = x; } + /// + /// \brief Save optimized model. + /// + /// \param save_optimized_model whether to enable save optimized model. + /// + void EnableSaveOptimModel(bool save_optimized_model) { + save_optimized_model_ = save_optimized_model; + } /// /// \brief Set the path of optimization cache directory. /// @@ -1239,6 +1247,7 @@ struct PD_INFER_DECL AnalysisConfig { // Variables held by config can take up a lot of memory in some cases. // So we release the memory when the predictor is set up. mutable bool is_valid_{true}; + bool save_optimized_model_{false}; std::string opt_cache_dir_; friend class paddle_infer::experimental::InternalUtils; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index db0048cc363..4ebd0dd5331 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder { std::vector analysis_passes_{ {"ir_graph_build_pass", "ir_analysis_pass", + "save_optimized_model_pass", "ir_params_sync_among_devices_pass", "adjust_cudnn_workspace_size_pass", "inference_op_replace_pass"}}; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 09e1e42cfbd..a2315774cf2 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) { .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) + .def("enable_save_optim_model", + &AnalysisConfig::EnableSaveOptimModel, + py::arg("save_optimized_model") = false) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps, diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index e6b5630dcca..d187fb9d173 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) { inference::CompareTensor(outputs.front(), naive_outputs.front()); } +#ifdef PADDLE_WITH_XPU +TEST(AnalysisPredictor, save_optimized_model_on) { + AnalysisConfig config; + config.SetModel(FLAGS_dirname); + config.SwitchIrOptim(true); + config.EnableSaveOptimModel(true); + config.EnableXpu(); + config.SetXpuDeviceId(0); + LOG(INFO) << config.Summary(); + CreatePaddlePredictor(config); +} +#endif + TEST(AnalysisPredictor, ZeroCopy) { AnalysisConfig config; config.SetModel(FLAGS_dirname); -- GitLab