未验证 提交 fa08a514 编写于 作者: S shentanyue 提交者: GitHub

[Inference] Save optimized model by pass (#53696)

上级 645e81f0
......@@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
Graph* graph,
std::unordered_set<std::string>* reserved_persistable_node_names) const {
for (auto* node : graph->Nodes()) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
for (auto* out_node : node->outputs) {
auto op_type = out_node->Op()->Type();
......@@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
std::unordered_set<const Node*> delete_nodes;
const std::unordered_set<ir::Node*> nodes = graph->Nodes();
for (auto* node : nodes) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
auto name = node->Var()->Name();
if (reserved_persistable_node_names.count(name) > 0) continue;
......
......@@ -146,6 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
......@@ -294,7 +295,7 @@ struct Argument {
XpuQuantPostDynamicWeightBits,
int);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
XpuQuantPostDynamicOpTypss,
XpuQuantPostDynamicOpTypes,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);
......
......@@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument,
}
bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
pass->Set("use_fc_padding", new bool(use_fc_padding));
} else if (pass_name == "fused_multi_transformer_xpu_quant_pass") {
} else if (pass_name == "fused_multi_transformer_xpu_pass") {
auto op_types = argument->xpu_quant_post_dynamic_op_types();
if (std::count(op_types.begin(),
op_types.end(),
......
......@@ -31,12 +31,17 @@ cc_library(
inference_op_replace_pass
SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass)
cc_library(
save_optimized_model_pass
SRCS save_optimized_model_pass.cc
DEPS analysis_pass argument ir_pass_manager graph_to_program_pass)
cc_library(
analysis_passes
SRCS passes.cc
DEPS ir_graph_build_pass
ir_analysis_pass
save_optimized_model_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
......
......@@ -21,6 +21,7 @@
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
namespace paddle {
namespace inference {
......@@ -33,6 +34,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrAnalysisPass));
passes_.emplace("ir_graph_build_pass",
std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
passes_.emplace("save_optimized_model_pass",
std::unique_ptr<AnalysisPass>(new SaveOptimizedModelPass));
passes_.emplace("memory_optimize_pass",
std::unique_ptr<AnalysisPass>(new MemoryOptimizePass));
passes_.emplace(
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
#include <unordered_set>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace inference {
namespace analysis {
void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) {
if (!argument->save_optimized_model()) {
LOG(WARNING) << "save_optim_cache_model is turned off, skip "
"save_optimized_model_pass";
return;
}
if (!argument->enable_ir_optim()) {
LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass";
return;
}
std::string model_opt_cache_dir = argument->optim_cache_dir();
if (!model_opt_cache_dir.empty()) {
if (!PathExists(model_opt_cache_dir)) {
PADDLE_ENFORCE_NE(
MKDIR(model_opt_cache_dir.c_str()),
-1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
model_opt_cache_dir));
}
} else {
model_opt_cache_dir = argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
}
auto& scope = argument->scope();
auto* graph = argument->main_graph_ptr();
framework::ProgramDesc optimized_program_desc;
framework::ir::GraphToProgram(*graph, &optimized_program_desc);
auto IsPersistable = [](const framework::VarDesc* var) {
if (var->Persistable() &&
var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
var->GetType() != framework::proto::VarType::FETCH_LIST &&
var->GetType() != framework::proto::VarType::RAW) {
return true;
}
return false;
};
auto SerializeParams = [&](const std::string& path) {
framework::ProgramDesc save_program;
auto* save_block = save_program.MutableBlock(0);
std::unordered_set<std::string> save_var_set;
for (size_t i = 0; i < optimized_program_desc.Size(); ++i) {
const auto& global_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : global_block.AllVars()) {
if (IsPersistable(var)) {
framework::VarDesc* new_var = save_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
save_var_set.insert(new_var->Name());
}
}
}
std::string save_params_path = path + "/" + "_optimized.pdiparams";
std::vector<std::string> save_var_list(save_var_set.begin(),
save_var_set.end());
std::sort(save_var_list.begin(), save_var_list.end());
auto* op = save_block->AppendOp();
op->SetType("save_combine");
op->SetInput("X", save_var_list);
op->SetAttr("file_path", save_params_path);
op->CheckAttrs();
framework::Executor exe(platform::CPUPlace{});
exe.Run(save_program, &scope, 0, true, true);
};
// TODO(shentanyue01): Setting hardware and version identification for
// optimized models.
auto SerializeProg = [&](const std::string& path) {
// All persistable var need to be moved to global block
auto* global_block = optimized_program_desc.MutableBlock(0);
for (size_t i = 1; i < optimized_program_desc.Size(); ++i) {
const auto& sub_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : sub_block.AllVars()) {
if (IsPersistable(var) && !global_block->HasVar(var->Name())) {
framework::VarDesc* new_var = global_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
}
}
}
std::string save_model_path = path + "/" + "_optimized.pdmodel";
auto str = optimized_program_desc.Proto()->SerializeAsString();
std::ofstream file(save_model_path.c_str(), std::ios::binary);
file.write(str.c_str(), str.size());
file.close();
};
SerializeProg(model_opt_cache_dir);
SerializeParams(model_opt_cache_dir);
LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir;
}
void SaveOptimizedModelPass::RunImpl(Argument* argument) {
if (argument->use_xpu_valid()) {
SaveOptimizedModel(argument);
}
}
std::string SaveOptimizedModelPass::repr() const {
return "save_optimized_model_pass";
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Save model optimized by ir pass
*/
class SaveOptimizedModelPass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
private:
void SaveOptimizedModel(Argument *argument);
};
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_dir_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields.
CP_MEMBER(save_optimized_model_);
CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
......@@ -1025,6 +1025,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << model_dir_;
ss << prog_file_;
ss << params_file_;
ss << save_optimized_model_;
ss << use_gpu_;
ss << enable_gpu_mixed_;
......@@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"});
// ir info
os.InsertRow(
{"save_optimized_model", save_optimized_model_ ? "true" : "false"});
os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"});
os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"});
os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"});
......
......@@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() {
// Analyze inference_program
argument_->SetPredictorID(predictor_id_);
argument_->SetRootPredictorID(root_predictor_id_);
argument_->SetSaveOptimizedModel(config_.save_optimized_model_);
argument_->SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) {
argument_->SetModelDir(config_.model_dir());
......@@ -1521,7 +1522,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_->SetXpuQuantPostDynamicWeightBits(
config_.xpu_quant_post_dynamic_weight_bits_);
argument_->SetXpuQuantPostDynamicOpTypss(
argument_->SetXpuQuantPostDynamicOpTypes(
config_.xpu_quant_post_dynamic_op_types_);
#endif
......
......@@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
void SetParamsFile(const std::string& x) { params_file_ = x; }
///
/// \brief Save optimized model.
///
/// \param save_optimized_model whether to enable save optimized model.
///
void EnableSaveOptimModel(bool save_optimized_model) {
save_optimized_model_ = save_optimized_model;
}
///
/// \brief Set the path of optimization cache directory.
///
......@@ -1239,6 +1247,7 @@ struct PD_INFER_DECL AnalysisConfig {
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
bool save_optimized_model_{false};
std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils;
......
......@@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder {
std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass",
"ir_analysis_pass",
"save_optimized_model_pass",
"ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass",
"inference_op_replace_pass"}};
......
......@@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_profile", &AnalysisConfig::EnableProfile)
.def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
.def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)
.def("enable_save_optim_model",
&AnalysisConfig::EnableSaveOptimModel,
py::arg("save_optimized_model") = false)
.def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir)
.def("switch_use_feed_fetch_ops",
&AnalysisConfig::SwitchUseFeedFetchOps,
......
......@@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) {
inference::CompareTensor(outputs.front(), naive_outputs.front());
}
#ifdef PADDLE_WITH_XPU
TEST(AnalysisPredictor, save_optimized_model_on) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableSaveOptimModel(true);
config.EnableXpu();
config.SetXpuDeviceId(0);
LOG(INFO) << config.Summary();
CreatePaddlePredictor<AnalysisConfig>(config);
}
#endif
TEST(AnalysisPredictor, ZeroCopy) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册