Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fa08a514
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fa08a514
编写于
5月 19, 2023
作者:
S
shentanyue
提交者:
GitHub
5月 19, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference] Save optimized model by pass (#53696)
上级
645e81f0
变更
13
显示空白变更内容
内联
并排
Showing
13 changed file
with
228 addition
and
4 deletion
+228
-4
paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
+2
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+2
-1
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+1
-1
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+5
-0
paddle/fluid/inference/analysis/passes/passes.cc
paddle/fluid/inference/analysis/passes/passes.cc
+3
-0
paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc
...id/inference/analysis/passes/save_optimized_model_pass.cc
+144
-0
paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h
...uid/inference/analysis/passes/save_optimized_model_pass.h
+39
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+4
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-1
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+9
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+1
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+3
-0
test/cpp/inference/api/analysis_predictor_tester.cc
test/cpp/inference/api/analysis_predictor_tester.cc
+13
-0
未找到文件。
paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
浏览文件 @
fa08a514
...
...
@@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
Graph
*
graph
,
std
::
unordered_set
<
std
::
string
>*
reserved_persistable_node_names
)
const
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
||
node
->
Name
()
==
"fetch"
||
node
->
Name
()
==
"feed"
)
continue
;
if
(
!
node
->
IsVar
()
||
!
node
->
Var
()
->
Persistable
())
continue
;
for
(
auto
*
out_node
:
node
->
outputs
)
{
auto
op_type
=
out_node
->
Op
()
->
Type
();
...
...
@@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
const
std
::
unordered_set
<
ir
::
Node
*>
nodes
=
graph
->
Nodes
();
for
(
auto
*
node
:
nodes
)
{
if
(
!
node
||
node
->
Name
()
==
"fetch"
||
node
->
Name
()
==
"feed"
)
continue
;
if
(
!
node
->
IsVar
()
||
!
node
->
Var
()
->
Persistable
())
continue
;
auto
name
=
node
->
Var
()
->
Name
();
if
(
reserved_persistable_node_names
.
count
(
name
)
>
0
)
continue
;
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
fa08a514
...
...
@@ -146,6 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
model_program_path
,
ModelProgramPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
model_params_path
,
ModelParamsPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
model_from_memory
,
ModelFromMemory
,
bool
);
DECL_ARGUMENT_FIELD
(
save_optimized_model
,
SaveOptimizedModel
,
bool
);
DECL_ARGUMENT_FIELD
(
optim_cache_dir
,
OptimCacheDir
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
enable_ir_optim
,
EnableIrOptim
,
bool
);
...
...
@@ -294,7 +295,7 @@ struct Argument {
XpuQuantPostDynamicWeightBits
,
int
);
DECL_ARGUMENT_FIELD
(
xpu_quant_post_dynamic_op_types
,
XpuQuantPostDynamicOpTyp
s
s
,
XpuQuantPostDynamicOpTyp
e
s
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
use_opencl
,
UseOpenCL
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
fa08a514
...
...
@@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument,
}
bool
use_fc_padding
=
!
fc_mkldnn_pass
&&
argument
->
use_fc_padding
();
pass
->
Set
(
"use_fc_padding"
,
new
bool
(
use_fc_padding
));
}
else
if
(
pass_name
==
"fused_multi_transformer_xpu_
quant_
pass"
)
{
}
else
if
(
pass_name
==
"fused_multi_transformer_xpu_pass"
)
{
auto
op_types
=
argument
->
xpu_quant_post_dynamic_op_types
();
if
(
std
::
count
(
op_types
.
begin
(),
op_types
.
end
(),
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
浏览文件 @
fa08a514
...
...
@@ -31,12 +31,17 @@ cc_library(
inference_op_replace_pass
SRCS inference_op_replace_pass.cc
DEPS analysis_pass graph_to_program_pass
)
cc_library
(
save_optimized_model_pass
SRCS save_optimized_model_pass.cc
DEPS analysis_pass argument ir_pass_manager graph_to_program_pass
)
cc_library
(
analysis_passes
SRCS passes.cc
DEPS ir_graph_build_pass
ir_analysis_pass
save_optimized_model_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
...
...
paddle/fluid/inference/analysis/passes/passes.cc
浏览文件 @
fa08a514
...
...
@@ -21,6 +21,7 @@
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -33,6 +34,8 @@ PassRegistry::PassRegistry() {
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisPass
));
passes_
.
emplace
(
"ir_graph_build_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrGraphBuildPass
));
passes_
.
emplace
(
"save_optimized_model_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
SaveOptimizedModelPass
));
passes_
.
emplace
(
"memory_optimize_pass"
,
std
::
unique_ptr
<
AnalysisPass
>
(
new
MemoryOptimizePass
));
passes_
.
emplace
(
...
...
paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc
0 → 100644
浏览文件 @
fa08a514
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h"
#include <unordered_set>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SaveOptimizedModelPass
::
SaveOptimizedModel
(
Argument
*
argument
)
{
if
(
!
argument
->
save_optimized_model
())
{
LOG
(
WARNING
)
<<
"save_optim_cache_model is turned off, skip "
"save_optimized_model_pass"
;
return
;
}
if
(
!
argument
->
enable_ir_optim
())
{
LOG
(
WARNING
)
<<
"ir_optim is turned off, skip save_optimized_model_pass"
;
return
;
}
std
::
string
model_opt_cache_dir
=
argument
->
optim_cache_dir
();
if
(
!
model_opt_cache_dir
.
empty
())
{
if
(
!
PathExists
(
model_opt_cache_dir
))
{
PADDLE_ENFORCE_NE
(
MKDIR
(
model_opt_cache_dir
.
c_str
()),
-
1
,
platform
::
errors
::
PreconditionNotMet
(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write"
,
model_opt_cache_dir
));
}
}
else
{
model_opt_cache_dir
=
argument
->
Has
(
"model_dir"
)
?
argument
->
model_dir
()
:
GetDirRoot
(
argument
->
model_program_path
());
}
auto
&
scope
=
argument
->
scope
();
auto
*
graph
=
argument
->
main_graph_ptr
();
framework
::
ProgramDesc
optimized_program_desc
;
framework
::
ir
::
GraphToProgram
(
*
graph
,
&
optimized_program_desc
);
auto
IsPersistable
=
[](
const
framework
::
VarDesc
*
var
)
{
if
(
var
->
Persistable
()
&&
var
->
GetType
()
!=
framework
::
proto
::
VarType
::
FEED_MINIBATCH
&&
var
->
GetType
()
!=
framework
::
proto
::
VarType
::
FETCH_LIST
&&
var
->
GetType
()
!=
framework
::
proto
::
VarType
::
RAW
)
{
return
true
;
}
return
false
;
};
auto
SerializeParams
=
[
&
](
const
std
::
string
&
path
)
{
framework
::
ProgramDesc
save_program
;
auto
*
save_block
=
save_program
.
MutableBlock
(
0
);
std
::
unordered_set
<
std
::
string
>
save_var_set
;
for
(
size_t
i
=
0
;
i
<
optimized_program_desc
.
Size
();
++
i
)
{
const
auto
&
global_block
=
optimized_program_desc
.
Block
(
i
);
for
(
framework
::
VarDesc
*
var
:
global_block
.
AllVars
())
{
if
(
IsPersistable
(
var
))
{
framework
::
VarDesc
*
new_var
=
save_block
->
Var
(
var
->
Name
());
new_var
->
SetShape
(
var
->
GetShape
());
new_var
->
SetDataType
(
var
->
GetDataType
());
new_var
->
SetType
(
var
->
GetType
());
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetPersistable
(
true
);
save_var_set
.
insert
(
new_var
->
Name
());
}
}
}
std
::
string
save_params_path
=
path
+
"/"
+
"_optimized.pdiparams"
;
std
::
vector
<
std
::
string
>
save_var_list
(
save_var_set
.
begin
(),
save_var_set
.
end
());
std
::
sort
(
save_var_list
.
begin
(),
save_var_list
.
end
());
auto
*
op
=
save_block
->
AppendOp
();
op
->
SetType
(
"save_combine"
);
op
->
SetInput
(
"X"
,
save_var_list
);
op
->
SetAttr
(
"file_path"
,
save_params_path
);
op
->
CheckAttrs
();
framework
::
Executor
exe
(
platform
::
CPUPlace
{});
exe
.
Run
(
save_program
,
&
scope
,
0
,
true
,
true
);
};
// TODO(shentanyue01): Setting hardware and version identification for
// optimized models.
auto
SerializeProg
=
[
&
](
const
std
::
string
&
path
)
{
// All persistable var need to be moved to global block
auto
*
global_block
=
optimized_program_desc
.
MutableBlock
(
0
);
for
(
size_t
i
=
1
;
i
<
optimized_program_desc
.
Size
();
++
i
)
{
const
auto
&
sub_block
=
optimized_program_desc
.
Block
(
i
);
for
(
framework
::
VarDesc
*
var
:
sub_block
.
AllVars
())
{
if
(
IsPersistable
(
var
)
&&
!
global_block
->
HasVar
(
var
->
Name
()))
{
framework
::
VarDesc
*
new_var
=
global_block
->
Var
(
var
->
Name
());
new_var
->
SetShape
(
var
->
GetShape
());
new_var
->
SetDataType
(
var
->
GetDataType
());
new_var
->
SetType
(
var
->
GetType
());
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetPersistable
(
true
);
}
}
}
std
::
string
save_model_path
=
path
+
"/"
+
"_optimized.pdmodel"
;
auto
str
=
optimized_program_desc
.
Proto
()
->
SerializeAsString
();
std
::
ofstream
file
(
save_model_path
.
c_str
(),
std
::
ios
::
binary
);
file
.
write
(
str
.
c_str
(),
str
.
size
());
file
.
close
();
};
SerializeProg
(
model_opt_cache_dir
);
SerializeParams
(
model_opt_cache_dir
);
LOG
(
INFO
)
<<
"Optimized model saved to "
<<
model_opt_cache_dir
;
}
void
SaveOptimizedModelPass
::
RunImpl
(
Argument
*
argument
)
{
if
(
argument
->
use_xpu_valid
())
{
SaveOptimizedModel
(
argument
);
}
}
std
::
string
SaveOptimizedModelPass
::
repr
()
const
{
return
"save_optimized_model_pass"
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h
0 → 100644
浏览文件 @
fa08a514
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Save model optimized by ir pass
*/
class
SaveOptimizedModelPass
:
public
AnalysisPass
{
public:
void
RunImpl
(
Argument
*
argument
)
override
;
std
::
string
repr
()
const
override
;
private:
void
SaveOptimizedModel
(
Argument
*
argument
);
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
fa08a514
...
...
@@ -409,7 +409,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
// params_file_ fields.
CP_MEMBER
(
save_optimized_model_
);
CP_MEMBER
(
opt_cache_dir_
);
CP_MEMBER
(
prog_file_
);
CP_MEMBER
(
params_file_
);
...
...
@@ -1025,6 +1025,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
model_dir_
;
ss
<<
prog_file_
;
ss
<<
params_file_
;
ss
<<
save_optimized_model_
;
ss
<<
use_gpu_
;
ss
<<
enable_gpu_mixed_
;
...
...
@@ -1347,6 +1348,8 @@ std::string AnalysisConfig::Summary() {
os
.
InsertRow
({
"use_cinn_compiler"
,
use_cinn_compiler_
?
"true"
:
"false"
});
// ir info
os
.
InsertRow
(
{
"save_optimized_model"
,
save_optimized_model_
?
"true"
:
"false"
});
os
.
InsertRow
({
"ir_optim"
,
enable_ir_optim_
?
"true"
:
"false"
});
os
.
InsertRow
({
"ir_debug"
,
ir_debug_
?
"true"
:
"false"
});
os
.
InsertRow
({
"memory_optim"
,
enable_memory_optim_
?
"true"
:
"false"
});
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
fa08a514
...
...
@@ -1355,6 +1355,7 @@ void AnalysisPredictor::PrepareArgument() {
// Analyze inference_program
argument_
->
SetPredictorID
(
predictor_id_
);
argument_
->
SetRootPredictorID
(
root_predictor_id_
);
argument_
->
SetSaveOptimizedModel
(
config_
.
save_optimized_model_
);
argument_
->
SetOptimCacheDir
(
config_
.
opt_cache_dir_
);
if
(
!
config_
.
model_dir
().
empty
())
{
argument_
->
SetModelDir
(
config_
.
model_dir
());
...
...
@@ -1521,7 +1522,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
->
SetXpuEnableMultiStream
(
config_
.
xpu_enable_multi_stream_
);
argument_
->
SetXpuQuantPostDynamicWeightBits
(
config_
.
xpu_quant_post_dynamic_weight_bits_
);
argument_
->
SetXpuQuantPostDynamicOpTyp
s
s
(
argument_
->
SetXpuQuantPostDynamicOpTyp
e
s
(
config_
.
xpu_quant_post_dynamic_op_types_
);
#endif
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
fa08a514
...
...
@@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
void
SetParamsFile
(
const
std
::
string
&
x
)
{
params_file_
=
x
;
}
///
/// \brief Save optimized model.
///
/// \param save_optimized_model whether to enable save optimized model.
///
void
EnableSaveOptimModel
(
bool
save_optimized_model
)
{
save_optimized_model_
=
save_optimized_model
;
}
///
/// \brief Set the path of optimization cache directory.
///
...
...
@@ -1239,6 +1247,7 @@ struct PD_INFER_DECL AnalysisConfig {
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
mutable
bool
is_valid_
{
true
};
bool
save_optimized_model_
{
false
};
std
::
string
opt_cache_dir_
;
friend
class
paddle_infer
::
experimental
::
InternalUtils
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
fa08a514
...
...
@@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder {
std
::
vector
<
std
::
string
>
analysis_passes_
{
{
"ir_graph_build_pass"
,
"ir_analysis_pass"
,
"save_optimized_model_pass"
,
"ir_params_sync_among_devices_pass"
,
"adjust_cudnn_workspace_size_pass"
,
"inference_op_replace_pass"
}};
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
fa08a514
...
...
@@ -820,6 +820,9 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"enable_profile"
,
&
AnalysisConfig
::
EnableProfile
)
.
def
(
"disable_glog_info"
,
&
AnalysisConfig
::
DisableGlogInfo
)
.
def
(
"glog_info_disabled"
,
&
AnalysisConfig
::
glog_info_disabled
)
.
def
(
"enable_save_optim_model"
,
&
AnalysisConfig
::
EnableSaveOptimModel
,
py
::
arg
(
"save_optimized_model"
)
=
false
)
.
def
(
"set_optim_cache_dir"
,
&
AnalysisConfig
::
SetOptimCacheDir
)
.
def
(
"switch_use_feed_fetch_ops"
,
&
AnalysisConfig
::
SwitchUseFeedFetchOps
,
...
...
test/cpp/inference/api/analysis_predictor_tester.cc
浏览文件 @
fa08a514
...
...
@@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) {
inference
::
CompareTensor
(
outputs
.
front
(),
naive_outputs
.
front
());
}
#ifdef PADDLE_WITH_XPU
TEST
(
AnalysisPredictor
,
save_optimized_model_on
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
(
true
);
config
.
EnableSaveOptimModel
(
true
);
config
.
EnableXpu
();
config
.
SetXpuDeviceId
(
0
);
LOG
(
INFO
)
<<
config
.
Summary
();
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
}
#endif
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录