Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
6b7b7677
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6b7b7677
编写于
12月 07, 2021
作者:
J
JingZhuangzhuang
提交者:
GitHub
12月 07, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
multithread memory optimize error fix (#37894)
* multithread_memory_optimize
上级
e3cca8ac
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
19 addition
and
18 deletion
+19
-18
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
...e/fluid/inference/analysis/passes/memory_optimize_pass.cc
+16
-13
paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
...le/fluid/inference/analysis/passes/memory_optimize_pass.h
+3
-5
未找到文件。
paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
浏览文件 @
6b7b7677
...
@@ -52,11 +52,11 @@ typedef struct {
...
@@ -52,11 +52,11 @@ typedef struct {
// The traversal order also affect the lifecycles, so different sort_kind is
// The traversal order also affect the lifecycles, so different sort_kind is
// used.
// used.
void
MemoryOptimizePass
::
CollectLifeCycle
(
void
MemoryOptimizePass
::
CollectLifeCycle
(
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>*
lifecycles
,
Graph
*
graph
,
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>*
lifecycles
,
int
sort_kind
)
const
{
int
sort_kind
)
const
{
max_lifecycle_
=
0
;
int
max_lifecycle
=
0
;
for
(
auto
*
op_node
:
framework
::
ir
::
TopologyVarientSort
(
for
(
auto
*
op_node
:
framework
::
ir
::
TopologyVarientSort
(
*
graph
_
,
static_cast
<
framework
::
ir
::
SortKind
>
(
sort_kind
)))
{
*
graph
,
static_cast
<
framework
::
ir
::
SortKind
>
(
sort_kind
)))
{
if
(
!
op_node
->
IsOp
())
continue
;
if
(
!
op_node
->
IsOp
())
continue
;
auto
reads
=
op_node
->
inputs
;
auto
reads
=
op_node
->
inputs
;
auto
writes
=
op_node
->
outputs
;
auto
writes
=
op_node
->
outputs
;
...
@@ -77,20 +77,20 @@ void MemoryOptimizePass::CollectLifeCycle(
...
@@ -77,20 +77,20 @@ void MemoryOptimizePass::CollectLifeCycle(
if
(
node
->
Var
()
->
Persistable
())
continue
;
if
(
node
->
Var
()
->
Persistable
())
continue
;
std
::
string
var
=
node
->
Name
();
std
::
string
var
=
node
->
Name
();
if
(
!
lifecycles
->
count
(
var
))
{
if
(
!
lifecycles
->
count
(
var
))
{
(
*
lifecycles
)[
var
]
=
std
::
make_pair
(
max_lifecycle
_
,
max_lifecycle_
);
(
*
lifecycles
)[
var
]
=
std
::
make_pair
(
max_lifecycle
,
max_lifecycle
);
}
else
{
}
else
{
(
*
lifecycles
)[
var
].
second
=
(
*
lifecycles
)[
var
].
second
=
std
::
max
(
max_lifecycle
_
,
lifecycles
->
at
(
var
).
second
);
// max()
std
::
max
(
max_lifecycle
,
lifecycles
->
at
(
var
).
second
);
// max()
}
}
}
}
}
}
++
max_lifecycle
_
;
++
max_lifecycle
;
}
}
}
}
void
MemoryOptimizePass
::
CollectVarMemorySize
(
void
MemoryOptimizePass
::
CollectVarMemorySize
(
space_table_t
*
space_table
)
const
{
Graph
*
graph
,
space_table_t
*
space_table
)
const
{
const
int
fake_batch_size
=
1
;
const
int
fake_batch_size
=
1
;
auto
valid_var
=
[
&
](
framework
::
ir
::
Node
*
node
)
->
bool
{
auto
valid_var
=
[
&
](
framework
::
ir
::
Node
*
node
)
->
bool
{
...
@@ -130,7 +130,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
...
@@ -130,7 +130,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
// although it's not always the case. so black list is the best compromise
// although it's not always the case. so black list is the best compromise
// between performance and underlying principle.
// between performance and underlying principle.
std
::
unordered_set
<
std
::
string
>
black_list
;
std
::
unordered_set
<
std
::
string
>
black_list
;
for
(
auto
*
node
:
graph
_
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
if
(
node
->
IsVar
()
&&
node
->
Var
()
->
GetType
()
==
node
->
Var
()
->
GetType
()
==
framework
::
proto
::
VarType
::
Type
::
VarType_Type_LOD_TENSOR
)
{
framework
::
proto
::
VarType
::
Type
::
VarType_Type_LOD_TENSOR
)
{
...
@@ -141,7 +141,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
...
@@ -141,7 +141,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
}
}
// Collect tensors from graph.
// Collect tensors from graph.
for
(
auto
*
node
:
graph
_
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
if
(
node
->
IsVar
()
&&
node
->
Var
()
->
GetType
()
==
node
->
Var
()
->
GetType
()
==
framework
::
proto
::
VarType
::
Type
::
VarType_Type_LOD_TENSOR
&&
framework
::
proto
::
VarType
::
Type
::
VarType_Type_LOD_TENSOR
&&
...
@@ -304,7 +304,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
...
@@ -304,7 +304,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
// 3. Perform reuse plan: Replace all var's name in the model according to the
// 3. Perform reuse plan: Replace all var's name in the model according to the
// mapping table.
// mapping table.
if
(
!
argument
->
enable_memory_optim
())
return
;
if
(
!
argument
->
enable_memory_optim
())
return
;
graph_
=
argument
->
main_graph_ptr
();
// Because of pass is a singleton, graph can not be member
// variables,otherwise,errors will be caused under multithreading
// conditions.
auto
graph
=
argument
->
main_graph_ptr
();
int
sort_kind
=
0
;
int
sort_kind
=
0
;
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>
lifecycles
;
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>
lifecycles
;
...
@@ -312,10 +315,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
...
@@ -312,10 +315,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
std
::
unordered_map
<
std
::
string
,
std
::
string
>
node2cluster
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
node2cluster
;
std
::
unordered_map
<
std
::
string
,
int
>
cluster_size
;
std
::
unordered_map
<
std
::
string
,
int
>
cluster_size
;
CollectLifeCycle
(
&
lifecycles
,
sort_kind
);
CollectLifeCycle
(
graph
,
&
lifecycles
,
sort_kind
);
CollectVarMemorySize
(
&
space_table
);
CollectVarMemorySize
(
graph
,
&
space_table
);
MakeSimpleReusePlan
(
lifecycles
,
space_table
,
&
node2cluster
,
&
cluster_size
);
MakeSimpleReusePlan
(
lifecycles
,
space_table
,
&
node2cluster
,
&
cluster_size
);
UpdateOpDescsByReuse
(
graph
_
,
node2cluster
,
sort_kind
);
UpdateOpDescsByReuse
(
graph
,
node2cluster
,
sort_kind
);
return
;
return
;
}
}
...
...
paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
浏览文件 @
6b7b7677
...
@@ -57,17 +57,15 @@ class MemoryOptimizePass : public AnalysisPass {
...
@@ -57,17 +57,15 @@ class MemoryOptimizePass : public AnalysisPass {
private:
private:
void
CollectLifeCycle
(
void
CollectLifeCycle
(
framework
::
ir
::
Graph
*
graph
,
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>
*
lifecycles
,
std
::
unordered_map
<
std
::
string
,
lifecycle_t
>
*
lifecycles
,
int
sort_kind
)
const
;
int
sort_kind
)
const
;
void
CollectVarMemorySize
(
space_table_t
*
space_table
)
const
;
void
CollectVarMemorySize
(
framework
::
ir
::
Graph
*
graph
,
space_table_t
*
space_table
)
const
;
public:
public:
std
::
string
repr
()
const
override
;
std
::
string
repr
()
const
override
;
private:
mutable
framework
::
ir
::
Graph
*
graph_
{
nullptr
};
mutable
int
max_lifecycle_
{
-
1
};
};
};
}
// namespace analysis
}
// namespace analysis
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录