Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9268f392
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9268f392
编写于
2月 08, 2023
作者:
L
LiYuRio
提交者:
GitHub
2月 08, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize gc in executor (#50301)
上级
80dc81c5
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
125 addition
and
54 deletion
+125
-54
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+125
-54
未找到文件。
paddle/fluid/distributed/fleet_executor/fleet_executor.cc
浏览文件 @
9268f392
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h"
#include <algorithm>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/fleet_executor/global.h"
...
...
@@ -53,40 +54,40 @@ FleetExecutor::~FleetExecutor() {
}
}
void
FleetExecutor
::
Init
(
const
std
::
string
&
carrier_id
,
const
framework
::
ProgramDesc
&
program_desc
,
framework
::
Scope
*
scope
,
const
platform
::
Place
&
place
,
int64_t
num_micro_batches
,
const
std
::
vector
<
TaskNode
*>&
task_nodes
,
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
task_id_to_rank
,
const
std
::
vector
<
std
::
string
>&
inference_root_scope_vars
,
const
std
::
vector
<
framework
::
Scope
*>&
micro_scope_list
)
{
PADDLE_ENFORCE_GT
(
task_nodes
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"Fleet executor is inited with empty task node"
));
// TODO(fleet_exe devs): the unused_vars should be got from run time graph
std
::
vector
<
std
::
unique_ptr
<
framework
::
OperatorBase
>>
ops
;
for
(
const
auto
&
desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
ops
.
emplace_back
(
framework
::
OpRegistry
::
CreateOp
(
*
desc
));
namespace
{
void
GetSubBlockTask
(
const
std
::
vector
<
TaskNode
*>&
tasks
,
TaskNode
*
cur_task
,
std
::
set
<
TaskNode
*>*
sub_block_task
)
{
auto
&
downstream
=
cur_task
->
downstream
();
auto
&
id_to_dep_type
=
cur_task
->
id_to_dep_type
();
for
(
auto
&
down
:
downstream
)
{
int64_t
task_id
=
down
.
first
;
if
(
id_to_dep_type
.
at
(
task_id
)
==
DependType
::
NORMAL
)
{
for
(
const
auto
&
task
:
tasks
)
{
if
(
task
->
task_id
()
==
task_id
)
{
sub_block_task
->
emplace
(
task
);
GetSubBlockTask
(
tasks
,
task
,
sub_block_task
);
}
auto
unused_vars
=
framework
::
GetUnusedVars
(
program_desc
.
Block
(
0
),
ops
,
{});
}
}
}
}
// NOTE: For inference, the vars in inference_root_scope_vars
// shouldn't be deleted during inf, for that they may be the result of the
// inf. If they are GCed, it will cause error during ZeroCopy the result.
void
PreventVarsDelete
(
std
::
unordered_map
<
const
framework
::
OperatorBase
*
,
std
::
vector
<
std
::
string
>>*
unused_vars
,
const
std
::
vector
<
std
::
string
>&
vars_not_gc
)
{
std
::
vector
<
const
framework
::
OperatorBase
*>
changed_ops
;
for
(
auto
pair
:
unused_vars
)
{
for
(
const
auto
&
pair
:
*
unused_vars
)
{
const
framework
::
OperatorBase
*
op
=
pair
.
first
;
std
::
vector
<
std
::
string
>
unused
=
pair
.
second
;
for
(
auto
name
:
inference_root_scope_vars
)
{
auto
iter
=
std
::
find
(
unused
.
begin
(),
unused
.
end
(),
name
);
if
(
iter
!=
unused
.
end
())
{
std
::
vector
<
std
::
string
>
cur_
unused
=
pair
.
second
;
for
(
auto
name
:
vars_not_gc
)
{
auto
iter
=
std
::
find
(
cur_unused
.
begin
(),
cur_
unused
.
end
(),
name
);
if
(
iter
!=
cur_
unused
.
end
())
{
VLOG
(
3
)
<<
"Removing var: ["
<<
name
<<
"] from the unused vars list of op: ["
<<
op
->
Type
()
<<
"]"
;
unused
.
erase
(
iter
);
cur_
unused
.
erase
(
iter
);
if
(
std
::
find
(
changed_ops
.
begin
(),
changed_ops
.
end
(),
op
)
==
changed_ops
.
end
())
{
// record the op whose unused vars have been updated
...
...
@@ -95,27 +96,29 @@ void FleetExecutor::Init(
}
}
// update the unused vars list in the map
unused_vars
[
op
]
=
unused
;
unused_vars
->
at
(
op
)
=
cur_
unused
;
}
for
(
auto
op
:
changed_ops
)
{
auto
iter
=
unused_vars
.
find
(
op
);
const
auto
&
iter
=
unused_vars
->
find
(
op
);
if
(
iter
->
second
.
empty
())
{
// remove those ops in the map that have empty unused vars list
VLOG
(
3
)
<<
"Removing op: ["
<<
op
->
Type
()
<<
"] from unused_vars map."
;
unused_vars
.
erase
(
iter
);
unused_vars
->
erase
(
iter
);
}
}
runtime_graph_
=
std
::
make_shared
<
RuntimeGraph
>
();
std
::
unordered_map
<
int64_t
,
TaskNode
*>
interceptor_id_to_task
;
for
(
auto
task_node
:
task_nodes
)
{
task_node
->
SetUnusedVars
(
unused_vars
);
if
(
task_node
->
type
()
==
"Cond"
)
{
}
std
::
vector
<
std
::
string
>
GetUnusedVarsAfterWhile
(
const
framework
::
ProgramDesc
&
program_desc
,
TaskNode
*
cond_task
,
const
std
::
vector
<
std
::
string
>
vars_not_gc
)
{
std
::
vector
<
std
::
string
>
while_block_vars
;
VLOG
(
3
)
<<
"Vars in while sub block:"
;
for
(
auto
&
var
:
program_desc
.
Block
(
1
).
AllVars
())
{
VLOG
(
3
)
<<
var
->
Name
();
while_block_vars
.
emplace_back
(
var
->
Name
());
std
::
vector
<
std
::
unique_ptr
<
framework
::
OperatorBase
>>
ops
;
for
(
const
auto
&
desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
ops
.
emplace_back
(
framework
::
OpRegistry
::
CreateOp
(
*
desc
));
}
auto
unused_vars
=
framework
::
GetUnusedVars
(
program_desc
.
Block
(
0
),
ops
,
{});
PreventVarsDelete
(
&
unused_vars
,
vars_not_gc
);
for
(
const
auto
&
pair
:
unused_vars
)
{
if
(
pair
.
first
->
Type
()
==
"while"
)
{
for
(
const
auto
&
var_name
:
pair
.
second
)
{
...
...
@@ -123,20 +126,88 @@ void FleetExecutor::Init(
}
}
}
VLOG
(
3
)
<<
"Vars below will be removed after while:"
;
for
(
const
auto
&
name
:
while_block_vars
)
{
VLOG
(
3
)
<<
name
;
return
while_block_vars
;
}
}
// namespace
void
FleetExecutor
::
Init
(
const
std
::
string
&
carrier_id
,
const
framework
::
ProgramDesc
&
program_desc
,
framework
::
Scope
*
scope
,
const
platform
::
Place
&
place
,
int64_t
num_micro_batches
,
const
std
::
vector
<
TaskNode
*>&
task_nodes
,
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
task_id_to_rank
,
const
std
::
vector
<
std
::
string
>&
inference_root_scope_vars
,
const
std
::
vector
<
framework
::
Scope
*>&
micro_scope_list
)
{
PADDLE_ENFORCE_GT
(
task_nodes
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"Fleet executor is inited with empty task node"
));
// Set the unused var after running while op
std
::
set
<
TaskNode
*>
sub_block_tasks
;
std
::
vector
<
std
::
string
>
while_block_vars
;
for
(
const
auto
&
task_node
:
task_nodes
)
{
if
(
task_node
->
type
()
==
"Cond"
)
{
GetSubBlockTask
(
task_nodes
,
task_node
,
&
sub_block_tasks
);
while_block_vars
=
GetUnusedVarsAfterWhile
(
program_desc
,
task_node
,
inference_root_scope_vars
);
VLOG
(
3
)
<<
"Vars will be gced after while op"
;
for
(
auto
var
:
while_block_vars
)
{
VLOG
(
3
)
<<
var
;
}
task_node
->
SetWhileBlockVars
(
while_block_vars
);
}
}
std
::
vector
<
framework
::
OperatorBase
*>
sub_block_ops
;
for
(
const
auto
&
task_node
:
sub_block_tasks
)
{
for
(
const
auto
&
op
:
task_node
->
ops
())
{
sub_block_ops
.
emplace_back
(
op
);
}
}
// Analyse the unused vars in block 0. The operators in block 1
// should be passed in first for prevent vars been released but removed soon.
// Since the unused vars in block 1 need to analyse separately.
std
::
vector
<
std
::
unique_ptr
<
framework
::
OperatorBase
>>
ops
;
for
(
const
auto
&
task_node
:
task_nodes
)
{
for
(
const
auto
&
op
:
task_node
->
ops
())
{
ops
.
emplace_back
(
std
::
unique_ptr
<
framework
::
OperatorBase
>
(
op
));
}
}
auto
global_unused_vars
=
framework
::
GetUnusedVars
(
program_desc
.
Block
(
0
),
ops
,
{});
// Analyse the unused vars in block 1.
std
::
unordered_map
<
const
framework
::
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
sub_unused_vars
;
if
(
program_desc
.
Size
()
>
1
)
{
sub_unused_vars
=
framework
::
GetUnusedVars
(
program_desc
.
Block
(
1
),
ops
,
{});
PreventVarsDelete
(
&
sub_unused_vars
,
while_block_vars
);
}
for
(
auto
&
unique_op
:
ops
)
{
unique_op
.
release
();
}
// NOTE: For inference, the vars in inference_root_scope_vars
// shouldn't be deleted during inf, for that they may be the result of the
// inf. If they are GCed, it will cause error during ZeroCopy the result.
PreventVarsDelete
(
&
global_unused_vars
,
inference_root_scope_vars
);
runtime_graph_
=
std
::
make_shared
<
RuntimeGraph
>
();
std
::
unordered_map
<
int64_t
,
TaskNode
*>
interceptor_id_to_task
;
for
(
auto
task_node
:
task_nodes
)
{
if
(
sub_block_tasks
.
find
(
task_node
)
==
sub_block_tasks
.
end
())
{
task_node
->
SetUnusedVars
(
global_unused_vars
);
}
else
{
task_node
->
SetUnusedVars
(
sub_unused_vars
);
}
int64_t
interceptor_id
=
task_node
->
task_id
();
interceptor_id_to_task
.
emplace
(
interceptor_id
,
task_node
);
}
runtime_graph_
->
SetInterceptorIdToRank
(
task_id_to_rank
);
runtime_graph_
->
SetInterceptorIdToNode
(
interceptor_id_to_task
);
for
(
auto
&
unique_op
:
ops
)
{
unique_op
.
release
();
}
VLOG
(
5
)
<<
runtime_graph_
->
DebugString
();
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录