Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
594dc4d8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
594dc4d8
编写于
1月 10, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
partial gc 1st version
test=develop
上级
f3a13512
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
158 addition
and
16 deletion
+158
-16
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+1
-1
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+13
-1
paddle/fluid/framework/details/eager_deletion_pass.cc
paddle/fluid/framework/details/eager_deletion_pass.cc
+122
-3
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+0
-9
paddle/fluid/framework/details/reference_count_pass_helper.cc
...le/fluid/framework/details/reference_count_pass_helper.cc
+14
-1
paddle/fluid/framework/details/reference_count_pass_helper.h
paddle/fluid/framework/details/reference_count_pass_helper.h
+7
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-0
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
594dc4d8
...
...
@@ -54,7 +54,7 @@ cc_library(memory_optimize_pass SRCS analysis_var_pass.cc memory_reuse_types.cc
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
cc_library
(
memory_early_delete_pass SRCS memory_early_delete_pass.cc DEPS memory_optimize_pass computation_op_handle scale_loss_grad_op_handle rpc_op_handle
all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle graph graph_helper pass
)
cc_library
(
reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle
)
cc_library
(
reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle
proto_desc var_handle
)
cc_library
(
eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper
)
cc_library
(
eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass
)
cc_library
(
reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper
)
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
594dc4d8
...
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
...
...
@@ -45,6 +46,7 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
}
}
#endif
PADDLE_ENFORCE
(
!
var_names_
.
empty
(),
"Var names cannot be empty"
);
}
EagerDeletionOpHandle
::~
EagerDeletionOpHandle
()
{
...
...
@@ -60,7 +62,13 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
std
::
string
EagerDeletionOpHandle
::
Name
()
const
{
return
"eager_deletion"
;
}
void
EagerDeletionOpHandle
::
RunImpl
()
{
auto
*
exec_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
#ifdef PADDLE_WITH_CUDA
platform
::
RecordEvent
record_event
(
Name
(),
dev_ctx_
);
#else
platform
::
RecordEvent
record_event
(
Name
(),
nullptr
);
#endif
Scope
*
exec_scope
=
nullptr
;
std
::
deque
<
std
::
shared_ptr
<
memory
::
Allocation
>>
garbages
;
for
(
auto
&
name
:
var_names_
)
{
auto
it
=
ref_cnts_
->
find
(
name
);
...
...
@@ -69,6 +77,10 @@ void EagerDeletionOpHandle::RunImpl() {
continue
;
}
if
(
!
exec_scope
)
{
exec_scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
}
auto
*
var
=
exec_scope
->
FindVar
(
name
);
if
(
var
==
nullptr
)
{
continue
;
...
...
paddle/fluid/framework/details/eager_deletion_pass.cc
浏览文件 @
594dc4d8
...
...
@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <functional>
#include <queue>
#include <string>
#include <tuple>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
...
...
@@ -22,10 +25,120 @@
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
DEFINE_double
(
fraction_of_eager_deletion
,
1.0
,
"Fraction of eager deletion"
);
DEFINE_bool
(
eager_delete_tensor_only
,
false
,
""
);
namespace
paddle
{
namespace
framework
{
namespace
details
{
namespace
{
// NOLINT
using
OpToVarNameSetMap
=
std
::
unordered_map
<
ComputationOpHandle
*
,
std
::
unordered_set
<
std
::
string
>>
;
}
// NOLINT
static
bool
IsLoDTensor
(
VarDesc
*
var
)
{
return
var
->
Proto
()
->
type
().
type
()
==
proto
::
VarType
::
LOD_TENSOR
;
}
static
int64_t
GetNumel
(
const
GraphVars
&
vars
,
const
std
::
string
&
var_name
,
size_t
scope_idx
)
{
auto
*
var_desc
=
TryGetLatestVarDesc
(
vars
[
scope_idx
].
at
(
var_name
));
PADDLE_ENFORCE
(
IsLoDTensor
(
var_desc
));
auto
dims
=
var_desc
->
GetShape
();
return
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
static_cast
<
int64_t
>
(
1
),
std
::
multiplies
<
int64_t
>
());
}
static
void
SplitIntoLoDTensorAndNonLoDTensorVars
(
const
OpToVarNameSetMap
&
m
,
const
GraphVars
&
vars
,
OpToVarNameSetMap
*
lod_tensors
,
OpToVarNameSetMap
*
other_vars
)
{
lod_tensors
->
clear
();
other_vars
->
clear
();
for
(
auto
&
op_vars_pair
:
m
)
{
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
auto
*
var_desc
=
TryGetLatestVarDesc
(
vars
[
op_vars_pair
.
first
->
GetScopeIdx
()].
at
(
var_name
));
if
(
IsLoDTensor
(
var_desc
))
{
(
*
lod_tensors
)[
op_vars_pair
.
first
].
insert
(
var_name
);
}
else
{
(
*
other_vars
)[
op_vars_pair
.
first
].
insert
(
var_name
);
}
}
}
}
static
OpToVarNameSetMap
ShrinkGCVars
(
const
OpToVarNameSetMap
&
m
,
const
GraphVars
&
vars
,
double
fraction_of_memory_size
,
bool
delete_lod_tensor_only
=
false
)
{
// Do not perform gc
if
(
fraction_of_memory_size
<=
0.0
)
return
{};
// Perform complete gc
if
(
fraction_of_memory_size
>=
1.0
)
{
if
(
delete_lod_tensor_only
)
{
OpToVarNameSetMap
lod_tensors
,
other_vars
;
SplitIntoLoDTensorAndNonLoDTensorVars
(
m
,
vars
,
&
lod_tensors
,
&
other_vars
);
return
lod_tensors
;
}
else
{
return
m
;
}
}
// Perform partial gc
OpToVarNameSetMap
lod_tensors
,
other_vars
;
SplitIntoLoDTensorAndNonLoDTensorVars
(
m
,
vars
,
&
lod_tensors
,
&
other_vars
);
using
TupleType
=
std
::
tuple
<
std
::
string
,
ComputationOpHandle
*
,
int64_t
>
;
std
::
unordered_map
<
size_t
,
std
::
vector
<
TupleType
>>
place_to_vars
;
std
::
unordered_map
<
size_t
,
int64_t
>
total_memory_size
;
for
(
auto
&
op_vars_pair
:
lod_tensors
)
{
auto
scope_idx
=
op_vars_pair
.
first
->
GetScopeIdx
();
int64_t
size
=
0
;
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
auto
var_size
=
GetNumel
(
vars
,
var_name
,
scope_idx
);
size
+=
std
::
abs
(
var_size
);
place_to_vars
[
scope_idx
].
emplace_back
(
var_name
,
op_vars_pair
.
first
,
var_size
);
}
total_memory_size
.
emplace
(
scope_idx
,
size
);
}
for
(
auto
&
pair
:
place_to_vars
)
{
std
::
sort
(
pair
.
second
.
begin
(),
pair
.
second
.
end
(),
[](
const
TupleType
&
t1
,
const
TupleType
&
t2
)
{
return
std
::
abs
(
std
::
get
<
2
>
(
t1
))
>
std
::
abs
(
std
::
get
<
2
>
(
t2
));
});
}
OpToVarNameSetMap
ret
;
for
(
auto
&
pair
:
place_to_vars
)
{
auto
desired_delete_size
=
static_cast
<
int64_t
>
(
fraction_of_memory_size
*
total_memory_size
.
at
(
pair
.
first
));
int64_t
cur_size
=
0
;
for
(
size_t
i
=
0
;
i
<
pair
.
second
.
size
()
&&
cur_size
<
desired_delete_size
;
++
i
)
{
auto
&
var_name
=
std
::
get
<
0
>
(
pair
.
second
[
i
]);
auto
*
op
=
std
::
get
<
1
>
(
pair
.
second
[
i
]);
cur_size
+=
std
::
get
<
2
>
(
pair
.
second
[
i
]);
ret
[
op
].
insert
(
var_name
);
}
}
if
(
!
delete_lod_tensor_only
)
{
for
(
auto
&
op_vars_pair
:
other_vars
)
{
for
(
auto
&
var_name
:
op_vars_pair
.
second
)
{
ret
[
op_vars_pair
.
first
].
insert
(
var_name
);
}
}
}
return
ret
;
}
std
::
unique_ptr
<
ir
::
Graph
>
EagerDeletionPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
...
...
@@ -43,9 +156,7 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
// a reverse map of last_live_ops
// i.e., last op --> variable names which can be deleted.
std
::
unordered_map
<
ComputationOpHandle
*
,
std
::
unordered_set
<
std
::
string
>>
op_vars_map
;
OpToVarNameSetMap
op_vars_map
;
for
(
auto
&
var_ops_map
:
last_live_ops
)
{
for
(
auto
&
var_ops_pair
:
var_ops_map
)
{
const
std
::
string
&
var_name
=
var_ops_pair
.
first
;
...
...
@@ -55,6 +166,10 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
}
}
op_vars_map
=
ShrinkGCVars
(
op_vars_map
,
vars
,
FLAGS_fraction_of_eager_deletion
,
FLAGS_eager_delete_tensor_only
);
for
(
auto
&
pair
:
op_vars_map
)
{
auto
*
op
=
pair
.
first
;
auto
&
var_names
=
pair
.
second
;
...
...
@@ -85,6 +200,10 @@ std::unique_ptr<ir::Graph> EagerDeletionPass::ApplyImpl(
eager_deletion_op
->
AddOutput
(
dummy_leaf
);
}
VLOG
(
10
)
<<
"FLAGS_fraction_of_eager_deletion = "
<<
FLAGS_fraction_of_eager_deletion
;
VLOG
(
10
)
<<
"FLAGS_eager_delete_tensor_only = "
<<
FLAGS_eager_delete_tensor_only
;
VLOG
(
10
)
<<
"Create "
<<
op_vars_map
.
size
()
<<
" EagerDeletionOpHandle(s)"
;
return
graph
;
}
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
594dc4d8
...
...
@@ -189,15 +189,6 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
return
shrink_func
(
computation_op
);
}
static
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
)
{
VarDesc
*
var_desc
=
nullptr
;
std
::
find_if
(
vars
.
rbegin
(),
vars
.
rend
(),
[
&
](
VarHandle
*
var_handle
)
->
bool
{
var_desc
=
var_handle
->
Node
()
->
Var
();
return
var_desc
!=
nullptr
;
});
return
var_desc
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ReferenceCountPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
&
ref_cnts
=
Get
<
std
::
vector
<
ReferenceCountMap
>>
(
kGlobalReferenceCount
);
...
...
paddle/fluid/framework/details/reference_count_pass_helper.cc
浏览文件 @
594dc4d8
...
...
@@ -13,9 +13,22 @@
// limitations under the License.
#include "paddle/fluid/framework/details/reference_count_pass_helper.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/var_desc.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{}
// namespace details
namespace
details
{
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
)
{
VarDesc
*
var_desc
=
nullptr
;
std
::
find_if
(
vars
.
rbegin
(),
vars
.
rend
(),
[
&
](
VarHandle
*
var_handle
)
->
bool
{
var_desc
=
var_handle
->
Node
()
->
Var
();
return
var_desc
!=
nullptr
;
});
return
var_desc
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/reference_count_pass_helper.h
浏览文件 @
594dc4d8
...
...
@@ -25,6 +25,10 @@
namespace
paddle
{
namespace
framework
{
class
VarDesc
;
class
VarHandle
;
namespace
details
{
class
ComputationOpHandle
;
...
...
@@ -43,9 +47,11 @@ const char kGarbageCollector[] = "garbage_collector";
const
char
kAllPlaces
[]
=
"all_places"
;
using
LastLiveOpsOfVars
=
std
::
unordered_map
<
std
::
string
,
std
::
unordered_set
<
ComputationOpHandle
*>>
;
std
::
unordered_map
<
std
::
string
,
std
::
unordered_set
<
ComputationOpHandle
*>>
;
const
char
kLastLiveOpsOfVars
[]
=
"last_live_ops_of_var"
;
VarDesc
*
TryGetLatestVarDesc
(
const
std
::
vector
<
VarHandle
*>
&
vars
);
}
// namespace details
}
// namespace framework
}
// namespace paddle
python/paddle/fluid/__init__.py
浏览文件 @
594dc4d8
...
...
@@ -127,6 +127,7 @@ def __bootstrap__():
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'fraction_of_eager_deletion'
,
'eager_delete_tensor_only'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'enable_parallel_graph'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录