Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
aa892113
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
aa892113
编写于
12月 01, 2022
作者:
W
Wilber
提交者:
GitHub
12月 01, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference] Optimize memory_optimize pass. (#48476)
* update memory_optimize pass
上级
93099bb8
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
183 addition
and
14 deletion
+183
-14
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+64
-3
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+10
-1
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/pass_result_info.cc
paddle/fluid/inference/analysis/pass_result_info.cc
+15
-0
paddle/fluid/inference/analysis/pass_result_info.h
paddle/fluid/inference/analysis/pass_result_info.h
+66
-0
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
...e/fluid/inference/analysis/passes/memory_optimize_pass.cc
+7
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+16
-6
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-1
未找到文件。
paddle/fluid/framework/naive_executor.cc
浏览文件 @
aa892113
...
...
@@ -15,8 +15,11 @@
#include "paddle/fluid/framework/naive_executor.h"
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/denormal.h"
#ifdef PADDLE_WITH_MKLDNN
...
...
@@ -61,12 +64,31 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePush
(
op
->
Type
(),
platform
::
NvtxRangeColor
::
Green
);
#endif
// According to reuse table, we share the out tensor's holder.
if
(
reuse_cache_
.
count
(
op
.
get
()))
{
for
(
auto
&
it
:
reuse_cache_
[
op
.
get
()])
{
it
.
first
->
ShareBufferWith
(
*
cluster_buffer_
[
it
.
second
]);
}
}
op
->
Run
(
*
scope_
,
place_
);
// Update the shared_holder so that only records the max one.
if
(
reuse_cache_
.
count
(
op
.
get
()))
{
for
(
auto
&
it
:
reuse_cache_
[
op
.
get
()])
{
if
(
it
.
first
->
memory_size
()
>
cluster_buffer_
[
it
.
second
]
->
memory_size
())
{
cluster_buffer_
[
it
.
second
]
=
it
.
first
;
}
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
#endif
if
(
hookfunc_
)
{
hookfunc_
(
op
.
get
());
for
(
auto
&
func
:
hookfunc_
)
{
func
(
op
.
get
());
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
...
...
@@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
}
void
NaiveExecutor
::
RegisterOutputHook
(
const
HookFunc
&
hookfunc
)
{
hookfunc_
=
hookfunc
;
hookfunc_
.
push_back
(
hookfunc
);
}
void
NaiveExecutor
::
MakeReusePlan
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
reuse_table
)
{
std
::
unordered_map
<
std
::
string
,
std
::
unordered_set
<
std
::
string
>>
clusters
;
for
(
auto
&
it
:
reuse_table
)
{
clusters
[
it
.
second
].
insert
(
it
.
first
);
}
std
::
vector
<
std
::
string
>
cluster_names
;
for
(
auto
&
it
:
clusters
)
{
cluster_names
.
push_back
(
it
.
first
);
}
cluster_buffer_
.
resize
(
cluster_names
.
size
());
for
(
auto
&
op
:
ops_
)
{
for
(
auto
&
name
:
op
->
OutputVars
(
true
))
{
if
(
reuse_table
.
count
(
name
))
{
const
auto
&
reuse_name
=
reuse_table
.
at
(
name
);
auto
it
=
std
::
find
(
cluster_names
.
begin
(),
cluster_names
.
end
(),
reuse_name
);
int
idx
=
it
-
cluster_names
.
begin
();
auto
*
var
=
scope_
->
FindVar
(
name
);
auto
*
reuse_var
=
scope_
->
FindVar
(
reuse_name
);
if
(
var
&&
reuse_var
&&
var
->
IsType
<
phi
::
DenseTensor
>
()
&&
reuse_var
->
IsType
<
phi
::
DenseTensor
>
())
{
auto
*
tensor
=
var
->
GetMutable
<
phi
::
DenseTensor
>
();
auto
*
reuse_tensor
=
reuse_var
->
GetMutable
<
phi
::
DenseTensor
>
();
cluster_buffer_
[
idx
]
=
reuse_tensor
;
if
(
reuse_cache_
.
count
(
op
.
get
()))
{
reuse_cache_
[
op
.
get
()].
emplace
(
tensor
,
idx
);
}
else
{
reuse_cache_
[
op
.
get
()]
=
std
::
unordered_map
<
phi
::
DenseTensor
*
,
int
>
{{
tensor
,
idx
}};
}
}
}
}
}
}
NaiveExecutor
::~
NaiveExecutor
()
{
...
...
paddle/fluid/framework/naive_executor.h
浏览文件 @
aa892113
...
...
@@ -17,6 +17,7 @@
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
...
...
@@ -67,6 +68,9 @@ class NaiveExecutor {
Scope
*
GetScope
()
{
return
scope_
;
}
void
MakeReusePlan
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
reuse_table
);
void
ResetTrtOps
(
int
num
);
void
RegisterOutputHook
(
const
HookFunc
&
hookfunc
);
...
...
@@ -82,7 +86,12 @@ class NaiveExecutor {
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
{
nullptr
};
HookFunc
hookfunc_
{
nullptr
};
std
::
vector
<
HookFunc
>
hookfunc_
;
// Record information that tensor_a should ShareBufferWith tensor_b.
std
::
unordered_map
<
OperatorBase
*
,
std
::
unordered_map
<
phi
::
DenseTensor
*
,
int
>>
reuse_cache_
;
std
::
vector
<
phi
::
DenseTensor
*>
cluster_buffer_
;
};
}
// namespace framework
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
aa892113
...
...
@@ -20,7 +20,7 @@ cc_library(
cc_library
(
ir_pass_manager
SRCS ir_pass_manager.cc
SRCS ir_pass_manager.cc
pass_result_info.cc
DEPS graph pass
${
INFER_IR_PASSES
}
analysis_helper
)
cc_library
(
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
aa892113
...
...
@@ -139,6 +139,7 @@ struct Argument {
unique_ptr_t field__##_;
DECL_ARGUMENT_FIELD
(
predictor_id
,
PredictorID
,
int
);
DECL_ARGUMENT_FIELD
(
root_predictor_id
,
RootPredictorID
,
int
);
// Model path
DECL_ARGUMENT_FIELD
(
model_dir
,
ModelDir
,
std
::
string
);
// Model specified with program and parameters files.
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
aa892113
...
...
@@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
dlnne_input_shape_dict
()));
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
}
else
if
(
pass_name
==
"memory_optimize_pass"
)
{
pass
->
Set
(
"root_predictor_id"
,
new
int
(
argument
->
root_predictor_id
()));
}
if
(
pass_name
==
"lite_subgraph_pass"
)
{
bool
lite_enable_int8
=
...
...
paddle/fluid/inference/analysis/pass_result_info.cc
0 → 100644
浏览文件 @
aa892113
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/pass_result_info.h"
paddle/fluid/inference/analysis/pass_result_info.h
0 → 100644
浏览文件 @
aa892113
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/variant.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
PassResultInfoForRuntime
{
public:
using
PassInfo
=
paddle
::
variant
<
std
::
string
,
std
::
vector
<
std
::
string
>
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>>
;
static
PassResultInfoForRuntime
*
Instance
()
{
static
PassResultInfoForRuntime
info
;
return
&
info
;
}
template
<
typename
T
>
void
Set
(
int
predictor_id
,
const
std
::
string
&
pass_name
,
T
infos
)
{
map
[
predictor_id
].
emplace
(
pass_name
,
infos
);
}
template
<
typename
T
>
T
Get
(
int
predictor_id
,
const
std
::
string
&
pass_name
)
{
PADDLE_ENFORCE_EQ
(
map
.
count
(
predictor_id
)
&&
map
[
predictor_id
].
count
(
pass_name
),
true
,
phi
::
errors
::
InvalidArgument
(
"Not find predictor_id %d and pass_name %s"
,
predictor_id
,
pass_name
));
return
PADDLE_GET_CONST
(
T
,
map
[
predictor_id
][
pass_name
]);
}
private:
using
PassResultInfoMap
=
std
::
unordered_map
<
int
,
std
::
unordered_map
<
std
::
string
,
PassInfo
>>
;
PassResultInfoMap
map
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
浏览文件 @
aa892113
...
...
@@ -19,6 +19,7 @@
#include "glog/logging.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/inference/analysis/pass_result_info.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
@@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
// mapping table.
if
(
!
argument
->
enable_memory_optim
())
return
;
// Because of pass is a singleton, graph can not be member
// variables,otherwise
,
errors will be caused under multithreading
// variables,otherwise
,
errors will be caused under multithreading
// conditions.
auto
graph
=
argument
->
main_graph_ptr
();
...
...
@@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
CollectLifeCycle
(
graph
,
&
lifecycles
,
sort_kind
);
CollectVarMemorySize
(
graph
,
&
space_table
);
MakeSimpleReusePlan
(
lifecycles
,
space_table
,
&
node2cluster
,
&
cluster_size
);
UpdateOpDescsByReuse
(
graph
,
node2cluster
,
sort_kind
);
auto
*
pass_res_info
=
PassResultInfoForRuntime
::
Instance
();
pass_res_info
->
Set
(
argument
->
root_predictor_id
(),
"memory_optimize_pass"
,
node2cluster
);
return
;
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
aa892113
...
...
@@ -38,6 +38,7 @@
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/pass_result_info.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
...
...
@@ -262,6 +263,10 @@ bool AnalysisPredictor::Init(
"generated."
;
}
if
(
!
status_is_cloned_
)
{
root_predictor_id_
=
predictor_id_
;
}
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
...
@@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_
->
Prepare
(
sub_scope_
,
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops_
);
if
(
config_
.
enable_memory_optim_
)
{
auto
*
pass_res_info
=
inference
::
analysis
::
PassResultInfoForRuntime
::
Instance
();
auto
reuse_table
=
pass_res_info
->
Get
<
std
::
unordered_map
<
std
::
string
,
std
::
string
>>
(
root_predictor_id_
,
"memory_optimize_pass"
);
executor_
->
MakeReusePlan
(
reuse_table
);
}
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
,
platform
::
errors
::
PreconditionNotMet
(
"The sub_scope should not be nullptr."
));
...
...
@@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
argument_
.
SetPredictorID
(
predictor_id_
);
argument_
.
SetRootPredictorID
(
root_predictor_id_
);
argument_
.
SetOptimCacheDir
(
config_
.
opt_cache_dir_
);
if
(
!
config_
.
model_dir
().
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
());
...
...
@@ -2114,6 +2129,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
std
::
lock_guard
<
std
::
mutex
>
lk
(
clone_mutex_
);
auto
*
x
=
new
AnalysisPredictor
(
config_
);
x
->
status_is_cloned_
=
true
;
x
->
root_predictor_id_
=
this
->
root_predictor_id_
;
if
(
config_
.
use_external_stream_
&&
stream
==
nullptr
)
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"config has been configured to use external stream, but the Clone "
...
...
@@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
}
void
AnalysisPredictor
::
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
)
{
if
(
config_
.
enable_memory_optim
())
{
LOG
(
WARNING
)
<<
"If you want to run output hook function, you should "
"use config.EnableMemoryOptim(false) to turn off memory "
"reuse!"
;
return
;
}
static
std
::
once_flag
register_hook_flag
;
std
::
call_once
(
register_hook_flag
,
[
this
]
{
executor_
->
RegisterOutputHook
([
this
](
framework
::
OperatorBase
*
op
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
aa892113
...
...
@@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor {
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{
if
(
config_
.
shape_range_info_collected
())
{
config_
.
SwitchIrOptim
(
false
);
config_
.
EnableMemoryOptim
(
false
);
}
predictor_id_
=
inference
::
GetUniqueId
();
}
...
...
@@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor {
int
need_collect_var_shapes_
{
-
1
};
// -1 for default, 0 for false, 1 for true.
std
::
vector
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
batch_var_shapes_
;
int
predictor_id_
;
int
root_predictor_id_
{
-
1
};
private:
std
::
vector
<
Exp_OutputHookFunc
>
hookfuncs_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录