Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
594dc4d8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
594dc4d8
编写于
1月 10, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
partial gc 1st version
test=develop
上级
f3a13512
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
158 addition
and
16 deletion
+158
-16
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+1
-1
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+13
-1
paddle/fluid/framework/details/eager_deletion_pass.cc
paddle/fluid/framework/details/eager_deletion_pass.cc
+122
-3
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+0
-9
paddle/fluid/framework/details/reference_count_pass_helper.cc
...le/fluid/framework/details/reference_count_pass_helper.cc
+14
-1
paddle/fluid/framework/details/reference_count_pass_helper.h
paddle/fluid/framework/details/reference_count_pass_helper.h
+7
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-0
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
594dc4d8
...
...
@@ -54,7 +54,7 @@ cc_library(memory_optimize_pass SRCS analysis_var_pass.cc memory_reuse_types.cc
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
cc_library
(
memory_early_delete_pass SRCS memory_early_delete_pass.cc DEPS memory_optimize_pass computation_op_handle scale_loss_grad_op_handle rpc_op_handle
all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle graph graph_helper pass
)
cc_library
(
reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle
)
cc_library
(
reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle
proto_desc var_handle
)
cc_library
(
eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper
)
cc_library
(
eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass
)
cc_library
(
reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper
)
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
594dc4d8
...
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
...
...
@@ -45,6 +46,7 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
}
}
#endif
PADDLE_ENFORCE
(
!
var_names_
.
empty
(),
"Var names cannot be empty"
);
}
EagerDeletionOpHandle
::~
EagerDeletionOpHandle
()
{
...
...
@@ -60,7 +62,13 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
std
::
string
EagerDeletionOpHandle
::
Name
()
const
{
return
"eager_deletion"
;
}
void
EagerDeletionOpHandle
::
RunImpl
()
{
auto
*
exec_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
#ifdef PADDLE_WITH_CUDA
platform
::
RecordEvent
record_event
(
Name
(),
dev_ctx_
);
#else
platform
::
RecordEvent
record_event
(
Name
(),
nullptr
);
#endif
Scope
*
exec_scope
=
nullptr
;
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
for
(
auto
&
name
:
var_names_
)
{
auto
it
=
ref_cnts_
->
find
(
name
);
...
...
@@ -69,6 +77,10 @@ void EagerDeletionOpHandle::RunImpl() {
continue
;
}
if
(
!
exec_scope
)
{
exec_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
}
auto
*
var
=
exec_scope
->
FindVar
(
name
);
if
(
var
==
nullptr
)
{
continue
;
...
...
paddle/fluid/framework/details/eager_deletion_pass.cc
浏览文件 @
594dc4d8
...
...
@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <functional>
#include <queue>
#include <string>
#include <tuple>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
...
...
@@ -22,10 +25,120 @@
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
DEFINE_double
(
fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion"
);
DEFINE_bool
(
eager_delete_tensor_only
,
false
,
""
);
namespace
paddle
{
namespace
framework
{
namespace
details
{
namespace
{
// NOLINT
using
OpToVarNameSetMap
=
std
::
unordered_map
<
ComputationOpHandle
*
,
std
::
unordered_set
<
std
::
string
>>
;
}
// NOLINT
static
bool
IsLoDTensor
(
VarDesc
*
var
)
{
return
var
->
Proto
()
->
type
().
type
()
==
proto
::
VarType
::
LOD_TENSOR
;
}
static
int64_t
GetNumel
(
const
GraphVars
&
vars
,
const
std
::
string
&
var_name
,
size_t
scope_idx
)
{
auto
*
var_desc
=
TryGetLatestVarDesc
(
vars
[
scope_idx
].
at
(
var_name
));
PADDLE_ENFORCE
(
IsLoDTensor
(
var_desc
));
auto
dims
=
var_desc
->
GetShape
();
return
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
static_cast
<
int64_t
>
(
1
),
std
::
multiplies
<
int64_t
>
());
}
static
void
SplitIntoLoDTensorAndNonLoDTensorVars
(
const
OpToVarNameSetMap
&
m
,
const
GraphVars
&
vars
,
OpToVarNameSetMap
*
lod_tensors
,
OpToVarNameSetMap
*
other_vars
)
{
lod_tensors
->
clear
();
other_vars
->
clear
();
for
(
auto
&
op_vars_pair
:
m
)
{
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
auto
*
var_desc
=
TryGetLatestVarDesc
(
vars
[
op_vars_pair
.
first
->
GetScopeIdx
()].
at
(
var_name
));
if
(
IsLoDTensor
(
var_desc
))
{
(
*
lod_tensors
)[
op_vars_pair
.
first
].
insert
(
var_name
);
}
else
{
(
*
other_vars
)[
op_vars_pair
.
first
].
insert
(
var_name
);
}
}
}
}
static
OpToVarNameSetMap
ShrinkGCVars
(
const
OpToVarNameSetMap
&
m
,
const
GraphVars
&
vars
,
double
fraction_of_memory_size
,
bool
delete_lod_tensor_only
=
false
)
{
// Do not perform gc
if
(
fraction_of_memory_size
<=
0.0
)
return
{};
// Perform complete gc
if
(
fraction_of_memory_size
>=
1.0
)
{
if
(
delete_lod_tensor_only
)
{
OpToVarNameSetMap
lod_tensors
,
other_vars
;
SplitIntoLoDTensorAndNonLoDTensorVars
(
m
,
vars
,
&
lod_tensors
,
&
other_vars
);
return
lod_tensors
;
}
else
{
return
m
;
}
}
// Perform partial gc
OpToVarNameSetMap
lod_tensors
,
other_vars
;
SplitIntoLoDTensorAndNonLoDTensorVars
(
m
,
vars
,
&
lod_tensors
,
&
other_vars
);
using
TupleType
=
std
::
tuple
<
std
::
string
,
ComputationOpHandle
*
,
int64_t
>
;
std
::
unordered_map
<
size_t
,
std
::
vector
<
TupleType
>>
place_to_vars
;
std
::
unordered_map
<
size_t
,
int64_t
>
total_memory_size
;
for
(
auto
&
op_vars_pair
:
lod_tensors
)
{
auto
scope_idx
=
op_vars_pair
.
first
->
GetScopeIdx
();
int64_t
size
=
0
;
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
auto
var_size
=
GetNumel
(
vars
,
var_name
,
scope_idx
);
size
+=
std
::
abs
(
var_size
);
place_to_vars
[
scope_idx
].
emplace_back
(
var_name
,
op_vars_pair
.
first
,
var_size
);
}
total_memory_size
.
emplace
(
scope_idx
,
size
);
}
for
(
auto
&
pair
:
place_to_vars
)
{
std
::
sort
(
pair
.
second
.
begin
(),
pair
.
second
.
end
(),
[](
const
TupleType
&
t1
,
const
TupleType
&
t2
)
{
return
std
::
abs
(
std
::
get
<
2
>
(
t1
))
>
std
::
abs
(
std
::
get
<
2
>
(
t2
));
});
}
OpToVarNameSetMap
ret
;
for
(
auto
&
pair
:
place_to_vars
)
{
auto
desired_delete_size
=
static_cast
<
int64_t
>
(
fraction_of_memory_size
*
total_memory_size
.
at
(
pair
.
first
));
int64_t
cur_size
=
0
;
for
(
size_t
i
=
0
;
i
<
pair
.
second
.
size
()
&&
cur_size
<
desired_delete_size
;
++
i
)
{
auto
&
var_name
=
std
::
get
<
0
>
(
pair
.
second
[
i
]);
auto
*
op
=
std
::
get
<
1
>
(
pair
.
second
[
i
]);
cur_size
+=
std
::
get
<
2
>
(
pair
.
second
[
i
]);
ret
[
op
].
insert
(
var_name
);
}
}
if
(
!
delete_lod_tensor_only
)
{
for
(
auto
&
op_vars_pair
:
other_vars
)
{
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
ret
[
op_vars_pair
.
first
].
insert
(
var_name
);
}
}
}
return
ret
;
}
std
::
unique_ptr
<
ir
::
Graph
>
EagerDeletionPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
...
...
@@ -43,9 +156,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
// a reverse map of last_live_ops
// i.e., last op --> variable names which can be deleted.
std
::
unordered_map
<
ComputationOpHandle
*
,
std
::
unordered_set
<
std
::
string
>>
op_vars_map
;
OpToVarNameSetMap
op_vars_map
;
for
(
auto
&
var_ops_map
:
last_live_ops
)
{
for
(
auto
&
var_ops_pair
:
var_ops_map
)
{
const
std
::
string
&
var_name
=
var_ops_pair
.
first
;
...
...
@@ -55,6 +166,10 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
}
}
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
FLAGS_fraction_of_eager_deletion
,
FLAGS_eager_delete_tensor_only
);
for
(
auto
&
pair
:
op_vars_map
)
{
auto
*
op
=
pair
.
first
;
auto
&
var_names
=
pair
.
second
;
...
...
@@ -85,6 +200,10 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
eager_deletion_op
->
AddOutput
(
dummy_leaf
);
}
VLOG
(
10
)
<<
"FLAGS_fraction_of_eager_deletion = "
<<
FLAGS_fraction_of_eager_deletion
;
VLOG
(
10
)
<<
"FLAGS_eager_delete_tensor_only = "
<<
FLAGS_eager_delete_tensor_only
;
VLOG
(
10
)
<<
"Create "
<<
op_vars_map
.
size
()
<<
" EagerDeletionOpHandle(s)"
;
return
graph
;
}
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
594dc4d8
...
...
@@ -189,15 +189,6 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
return
shrink_func
(
computation_op
);
}
static
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
)
{
VarDesc
*
var_desc
=
nullptr
;
std
::
find_if
(
vars
.
rbegin
(),
vars
.
rend
(),
[
&
](
VarHandle
*
var_handle
)
->
bool
{
var_desc
=
var_handle
->
Node
()
->
Var
();
return
var_desc
!=
nullptr
;
});
return
var_desc
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ReferenceCountPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
Get
<
std
::
vector
<
ReferenceCountMap
>>
(
kGlobalReferenceCount
);
...
...
paddle/fluid/framework/details/reference_count_pass_helper.cc
浏览文件 @
594dc4d8
...
...
@@ -13,9 +13,22 @@
// limitations under the License.
#include "paddle/fluid/framework/details/reference_count_pass_helper.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/var_desc.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{}
// namespace details
namespace
details
{
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
)
{
VarDesc
*
var_desc
=
nullptr
;
std
::
find_if
(
vars
.
rbegin
(),
vars
.
rend
(),
[
&
](
VarHandle
*
var_handle
)
->
bool
{
var_desc
=
var_handle
->
Node
()
->
Var
();
return
var_desc
!=
nullptr
;
});
return
var_desc
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/reference_count_pass_helper.h
浏览文件 @
594dc4d8
...
...
@@ -25,6 +25,10 @@
namespace
paddle
{
namespace
framework
{
class
VarDesc
;
class
VarHandle
;
namespace
details
{
class
ComputationOpHandle
;
...
...
@@ -43,9 +47,11 @@ const char kGarbageCollector[] = "garbage_collector";
const
char
kAllPlaces
[]
=
"all_places"
;
using
LastLiveOpsOfVars
=
std
::
unordered_map
<
std
::
string
,
std
::
unordered_set
<
ComputationOpHandle
*>>
;
std
::
unordered_map
<
std
::
string
,
std
::
unordered_set
<
ComputationOpHandle
*>>
;
const
char
kLastLiveOpsOfVars
[]
=
"last_live_ops_of_var"
;
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
);
}
// namespace details
}
// namespace framework
}
// namespace paddle
python/paddle/fluid/__init__.py
浏览文件 @
594dc4d8
...
...
@@ -127,6 +127,7 @@ def __bootstrap__():
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'fraction_of_eager_deletion'
,
'eager_delete_tensor_only'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'enable_parallel_graph'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录