未验证 提交 fa08a514 编写于 作者: S shentanyue 提交者: GitHub

[Inference] Save optimized model by pass (#53696)

上级 645e81f0
...@@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames( ...@@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
Graph* graph, Graph* graph,
std::unordered_set<std::string>* reserved_persistable_node_names) const { std::unordered_set<std::string>* reserved_persistable_node_names) const {
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue; if (!node->IsVar() || !node->Var()->Persistable()) continue;
for (auto* out_node : node->outputs) { for (auto* out_node : node->outputs) {
auto op_type = out_node->Op()->Type(); auto op_type = out_node->Op()->Type();
...@@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes( ...@@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
std::unordered_set<const Node*> delete_nodes; std::unordered_set<const Node*> delete_nodes;
const std::unordered_set<ir::Node*> nodes = graph->Nodes(); const std::unordered_set<ir::Node*> nodes = graph->Nodes();
for (auto* node : nodes) { for (auto* node : nodes) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue; if (!node->IsVar() || !node->Var()->Persistable()) continue;
auto name = node->Var()->Name(); auto name = node->Var()->Name();
if (reserved_persistable_node_names.count(name) > 0) continue; if (reserved_persistable_node_names.count(name) > 0) continue;
......
...@@ -146,6 +146,7 @@ struct Argument { ...@@ -146,6 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string); DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool); DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
...@@ -294,7 +295,7 @@ struct Argument { ...@@ -294,7 +295,7 @@ struct Argument {
XpuQuantPostDynamicWeightBits, XpuQuantPostDynamicWeightBits,
int); int);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types, DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
XpuQuantPostDynamicOpTypss, XpuQuantPostDynamicOpTypes,
std::vector<std::string>); std::vector<std::string>);
DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool); DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);
......
...@@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument,
} }
bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding(); bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
pass->Set("use_fc_padding", new bool(use_fc_padding)); pass->Set("use_fc_padding", new bool(use_fc_padding));
} else if (pass_name == "fused_multi_transformer_xpu_quant_pass") { } else if (pass_name == "fused_multi_transformer_xpu_pass") {
auto op_types = argument->xpu_quant_post_dynamic_op_types(); auto op_types = argument->xpu_quant_post_dynamic_op_types();
if (std::count(op_types.begin(), if (std::count(op_types.begin(),
op_types.end(), op_types.end(),
......
...@@ -31,12 +31,17 @@ cc_library( ...@@ -31,12 +31,17 @@ cc_library(
inference_op_replace_pass inference_op_replace_pass
SRCS inference_op_replace_pass.cc SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass) DEPS analysis_pass graph_to_program_pass)
cc_library(
save_optimized_model_pass
SRCS save_optimized_model_pass.cc
DEPS analysis_pass argument ir_pass_manager graph_to_program_pass)
cc_library( cc_library(
analysis_passes analysis_passes
SRCS passes.cc SRCS passes.cc
DEPS ir_graph_build_pass DEPS ir_graph_build_pass
ir_analysis_pass ir_analysis_pass
save_optimized_model_pass
ir_params_sync_among_devices_pass ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass adjust_cudnn_workspace_size_pass
memory_optim_pass memory_optim_pass
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -33,6 +34,8 @@ PassRegistry::PassRegistry() { ...@@ -33,6 +34,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrAnalysisPass)); std::unique_ptr<AnalysisPass>(new IrAnalysisPass));
passes_.emplace("ir_graph_build_pass", passes_.emplace("ir_graph_build_pass",
std::unique_ptr<AnalysisPass>(new IrGraphBuildPass)); std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
passes_.emplace("save_optimized_model_pass",
std::unique_ptr<AnalysisPass>(new SaveOptimizedModelPass));
passes_.emplace("memory_optimize_pass", passes_.emplace("memory_optimize_pass",
std::unique_ptr<AnalysisPass>(new MemoryOptimizePass)); std::unique_ptr<AnalysisPass>(new MemoryOptimizePass));
passes_.emplace( passes_.emplace(
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
#include <unordered_set>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace inference {
namespace analysis {
void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
if (!argument->save_optimized_model()) {
LOG(WARNING) << "save_optim_cache_model is turned off, skip "
"save_optimized_model_pass";
return;
}
if (!argument->enable_ir_optim()) {
LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass";
return;
}
std::string model_opt_cache_dir = argument->optim_cache_dir();
if (!model_opt_cache_dir.empty()) {
if (!PathExists(model_opt_cache_dir)) {
PADDLE_ENFORCE_NE(
MKDIR(model_opt_cache_dir.c_str()),
-1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
model_opt_cache_dir));
}
} else {
model_opt_cache_dir = argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
}
auto& scope = argument->scope();
auto* graph = argument->main_graph_ptr();
framework::ProgramDesc optimized_program_desc;
framework::ir::GraphToProgram(*graph, &optimized_program_desc);
auto IsPersistable = [](const framework::VarDesc* var) {
if (var->Persistable() &&
var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
var->GetType() != framework::proto::VarType::FETCH_LIST &&
var->GetType() != framework::proto::VarType::RAW) {
return true;
}
return false;
};
auto SerializeParams = [&](const std::string& path) {
framework::ProgramDesc save_program;
auto* save_block = save_program.MutableBlock(0);
std::unordered_set<std::string> save_var_set;
for (size_t i = 0; i < optimized_program_desc.Size(); ++i) {
const auto& global_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : global_block.AllVars()) {
if (IsPersistable(var)) {
framework::VarDesc* new_var = save_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
save_var_set.insert(new_var->Name());
}
}
}
std::string save_params_path = path + "/" + "_optimized.pdiparams";
std::vector<std::string> save_var_list(save_var_set.begin(),
save_var_set.end());
std::sort(save_var_list.begin(), save_var_list.end());
auto* op = save_block->AppendOp();
op->SetType("save_combine");
op->SetInput("X", save_var_list);
op->SetAttr("file_path", save_params_path);
op->CheckAttrs();
framework::Executor exe(platform::CPUPlace{});
exe.Run(save_program, &scope, 0, true, true);
};
// TODO(shentanyue01): Setting hardware and version identification for
// optimized models.
auto SerializeProg = [&](const std::string& path) {
// All persistable var need to be moved to global block
auto* global_block = optimized_program_desc.MutableBlock(0);
for (size_t i = 1; i < optimized_program_desc.Size(); ++i) {
const auto& sub_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : sub_block.AllVars()) {
if (IsPersistable(var) && !global_block->HasVar(var->Name())) {
framework::VarDesc* new_var = global_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
}
}
}
std::string save_model_path = path + "/" + "_optimized.pdmodel";
auto str = optimized_program_desc.Proto()->SerializeAsString();
std::ofstream file(save_model_path.c_str(), std::ios::binary);
file.write(str.c_str(), str.size());
file.close();
};
SerializeProg(model_opt_cache_dir);
SerializeParams(model_opt_cache_dir);
LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir;
}
void SaveOptimizedModelPass::RunImpl(Argument* argument) {
if (argument->use_xpu_valid()) {
SaveOptimizedModel(argument);
}
}
std::string SaveOptimizedModelPass::repr() const {
return "save_optimized_model_pass";
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Save model optimized by ir pass
*/
class SaveOptimizedModelPass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
private:
void SaveOptimizedModel(Argument *argument);
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_dir_); CP_MEMBER(model_dir_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields. // params_file_ fields.
CP_MEMBER(save_optimized_model_);
CP_MEMBER(opt_cache_dir_); CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_); CP_MEMBER(prog_file_);
CP_MEMBER(params_file_); CP_MEMBER(params_file_);
...@@ -1025,6 +1025,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -1025,6 +1025,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << model_dir_; ss << model_dir_;
ss << prog_file_; ss << prog_file_;
ss << params_file_; ss << params_file_;
ss << save_optimized_model_;
ss << use_gpu_; ss << use_gpu_;
ss << enable_gpu_mixed_; ss << enable_gpu_mixed_;
...@@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() { ...@@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"}); os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"});
// ir info // ir info
os.InsertRow(
{"save_optimized_model", save_optimized_model_ ? "true" : "false"});
os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"}); os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"});
os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"}); os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"});
os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"}); os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"});
......
...@@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() {
// Analyze inference_program // Analyze inference_program
argument_->SetPredictorID(predictor_id_); argument_->SetPredictorID(predictor_id_);
argument_->SetRootPredictorID(root_predictor_id_); argument_->SetRootPredictorID(root_predictor_id_);
argument_->SetSaveOptimizedModel(config_.save_optimized_model_);
argument_->SetOptimCacheDir(config_.opt_cache_dir_); argument_->SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
argument_->SetModelDir(config_.model_dir()); argument_->SetModelDir(config_.model_dir());
...@@ -1521,7 +1522,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1521,7 +1522,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_); argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_->SetXpuQuantPostDynamicWeightBits( argument_->SetXpuQuantPostDynamicWeightBits(
config_.xpu_quant_post_dynamic_weight_bits_); config_.xpu_quant_post_dynamic_weight_bits_);
argument_->SetXpuQuantPostDynamicOpTypss( argument_->SetXpuQuantPostDynamicOpTypes(
config_.xpu_quant_post_dynamic_op_types_); config_.xpu_quant_post_dynamic_op_types_);
#endif #endif
......
...@@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
void SetParamsFile(const std::string& x) { params_file_ = x; } void SetParamsFile(const std::string& x) { params_file_ = x; }
///
/// \brief Save optimized model.
///
/// \param save_optimized_model whether to enable save optimized model.
///
void EnableSaveOptimModel(bool save_optimized_model) {
save_optimized_model_ = save_optimized_model;
}
/// ///
/// \brief Set the path of optimization cache directory. /// \brief Set the path of optimization cache directory.
/// ///
...@@ -1239,6 +1247,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -1239,6 +1247,7 @@ struct PD_INFER_DECL AnalysisConfig {
// Variables held by config can take up a lot of memory in some cases. // Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up. // So we release the memory when the predictor is set up.
mutable bool is_valid_{true}; mutable bool is_valid_{true};
bool save_optimized_model_{false};
std::string opt_cache_dir_; std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils; friend class paddle_infer::experimental::InternalUtils;
......
...@@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder { ...@@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder {
std::vector<std::string> analysis_passes_{ std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass", {"ir_graph_build_pass",
"ir_analysis_pass", "ir_analysis_pass",
"save_optimized_model_pass",
"ir_params_sync_among_devices_pass", "ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass", "adjust_cudnn_workspace_size_pass",
"inference_op_replace_pass"}}; "inference_op_replace_pass"}};
......
...@@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) { ...@@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_profile", &AnalysisConfig::EnableProfile) .def("enable_profile", &AnalysisConfig::EnableProfile)
.def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
.def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)
.def("enable_save_optim_model",
&AnalysisConfig::EnableSaveOptimModel,
py::arg("save_optimized_model") = false)
.def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir)
.def("switch_use_feed_fetch_ops", .def("switch_use_feed_fetch_ops",
&AnalysisConfig::SwitchUseFeedFetchOps, &AnalysisConfig::SwitchUseFeedFetchOps,
......
...@@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) { ...@@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) {
inference::CompareTensor(outputs.front(), naive_outputs.front()); inference::CompareTensor(outputs.front(), naive_outputs.front());
} }
#ifdef PADDLE_WITH_XPU
TEST(AnalysisPredictor, save_optimized_model_on) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableSaveOptimModel(true);
config.EnableXpu();
config.SetXpuDeviceId(0);
LOG(INFO) << config.Summary();
CreatePaddlePredictor<AnalysisConfig>(config);
}
#endif
TEST(AnalysisPredictor, ZeroCopy) { TEST(AnalysisPredictor, ZeroCopy) {
AnalysisConfig config; AnalysisConfig config;
config.SetModel(FLAGS_dirname); config.SetModel(FLAGS_dirname);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册