Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
67c700b4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
67c700b4
编写于
12月 03, 2020
作者:
A
Aurelius84
提交者:
GitHub
12月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Dy2Stat] Add cache for Executor and Context in run_program_op (#28421)
上级
d6753e1e
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
227 addition
and
73 deletion
+227
-73
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-0
paddle/fluid/framework/executor_cache.cc
paddle/fluid/framework/executor_cache.cc
+111
-0
paddle/fluid/framework/executor_cache.h
paddle/fluid/framework/executor_cache.h
+96
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+3
-1
paddle/fluid/operators/run_program_op.h
paddle/fluid/operators/run_program_op.h
+10
-64
python/paddle/fluid/tests/unittests/test_run_program_op.py
python/paddle/fluid/tests/unittests/test_run_program_op.py
+6
-8
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
67c700b4
...
...
@@ -268,6 +268,7 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS
graph build_strategy collective_helper
fast_threaded_ssa_graph_executor variable_helper
)
cc_library
(
executor_cache SRCS executor_cache.cc DEPS executor
)
cc_test
(
dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS
conditional_block_op executor
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto boost
)
...
...
paddle/fluid/framework/executor_cache.cc
0 → 100644
浏览文件 @
67c700b4
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/executor_cache.h"
#include <string>
#include <unordered_set>
#include <vector>
namespace
paddle
{
namespace
framework
{
namespace
details
{
static
void
AppendSkipDeletionVars
(
const
std
::
vector
<
std
::
string
>
&
append_vars
,
std
::
vector
<
std
::
string
>
*
all_vars
)
{
for
(
auto
&
var
:
append_vars
)
{
all_vars
->
emplace_back
(
var
);
}
}
static
void
AppendSafeEagerDeletionSkipVars
(
const
framework
::
ProgramDesc
&
program
,
std
::
vector
<
std
::
string
>
*
skip_vars
)
{
const
framework
::
BlockDesc
&
block
=
program
.
Block
(
0
);
const
std
::
vector
<
framework
::
OpDesc
*>
&
all_ops
=
block
.
AllOps
();
std
::
unordered_set
<
std
::
string
>
grad_op_output
;
std
::
unordered_set
<
std
::
string
>
grad_op_input
;
for
(
const
framework
::
OpDesc
*
op
:
all_ops
)
{
int
op_role
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
if
((
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kBackward
))
==
0
)
{
continue
;
}
for
(
const
std
::
string
&
in_arg_name
:
op
->
InputArgumentNames
())
{
grad_op_input
.
emplace
(
in_arg_name
);
}
for
(
const
std
::
string
&
out_arg_name
:
op
->
OutputArgumentNames
())
{
grad_op_output
.
emplace
(
out_arg_name
);
}
}
// For the grad op input variables, if it is not output of grad_op, it may
// be output of forward op and we should set the variables as skip_var to
// prevent it being deleted when grad op is called multiple times.
for
(
const
std
::
string
&
var_name
:
grad_op_input
)
{
if
(
grad_op_output
.
find
(
var_name
)
==
grad_op_output
.
end
())
{
skip_vars
->
emplace_back
(
var_name
);
}
}
}
}
// namespace details
// C++11 removes the need for manual locking. Concurrent execution shall wait if
// a static local variable is already being initialized.
// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex
ExecutorInfoCache
&
ExecutorInfoCache
::
Instance
()
{
static
ExecutorInfoCache
g_exe_cache_info_map
;
return
g_exe_cache_info_map
;
}
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
GetExecutorInfoFromCache
(
const
framework
::
Executor
&
exe
,
const
framework
::
ExecutionContext
&
ctx
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
&
ctx_output_names
,
bool
is_grad
)
{
auto
*
program
=
ctx
.
Attr
<
BlockDesc
*>
(
"global_block"
)
->
Program
();
auto
&
cached_exe_info
=
framework
::
ExecutorInfoCache
::
Instance
();
auto
cache_key
=
framework
::
ExecutorInfoCache
::
KeyType
(
program
,
is_grad
);
if
(
!
cached_exe_info
.
Has
(
cache_key
))
{
VLOG
(
1
)
<<
"create exe_info for program: "
<<
program
<<
" is_grad: "
<<
is_grad
;
// skip delete vars
std
::
vector
<
std
::
string
>
skip_vars
;
for
(
auto
&
output_names
:
ctx_output_names
)
{
details
::
AppendSkipDeletionVars
(
output_names
,
&
skip_vars
);
}
if
(
is_grad
)
{
details
::
AppendSafeEagerDeletionSkipVars
(
*
program
,
&
skip_vars
);
}
VLOG
(
2
)
<<
"Prepare to skip "
<<
skip_vars
.
size
()
<<
" var(s): "
<<
string
::
join_strings
(
skip_vars
,
' '
);
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
exe_ctx
=
std
::
move
(
exe
.
Prepare
(
*
program
,
/*block_id=*/
0
,
skip_vars
));
cached_exe_info
.
Insert
(
cache_key
,
exe_ctx
);
return
exe_ctx
;
}
else
{
VLOG
(
1
)
<<
"get exe_info from cache by program: "
<<
program
<<
" is_grad: "
<<
is_grad
;
return
cached_exe_info
.
Get
(
cache_key
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/executor_cache.h
0 → 100644
浏览文件 @
67c700b4
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
framework
{
class
ExecutorInfoCache
{
public:
/*
* The ExecutorPrepareContext is different while running forward program and
* backward program. We add bool value into cached key to distinguish this.
*/
using
KeyType
=
std
::
pair
<
const
framework
::
ProgramDesc
*
,
/*is_grad*/
bool
>
;
struct
HashPair
{
template
<
class
T1
,
class
T2
>
size_t
operator
()(
const
std
::
pair
<
T1
,
T2
>&
p
)
const
noexcept
{
size_t
seed
=
10
;
hash_combine
(
&
seed
,
p
.
first
);
hash_combine
(
&
seed
,
p
.
second
);
return
seed
;
}
template
<
typename
T
>
void
hash_combine
(
size_t
*
seed
,
const
T
&
val
)
const
{
std
::
hash
<
T
>
hasher
;
(
*
seed
)
^=
hasher
(
val
)
+
0x9e3779b9
+
((
*
seed
)
<<
6
)
+
((
*
seed
>>
2
));
}
};
static
ExecutorInfoCache
&
Instance
();
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
Get
(
const
KeyType
&
key
)
const
{
PADDLE_ENFORCE_EQ
(
Has
(
key
),
true
,
platform
::
errors
::
NotFound
(
"(programDesc: %s, is_grad: %s) doesn't exist in ExecutorInfoCache"
,
key
.
first
,
key
.
second
));
return
info_map_
.
at
(
key
);
}
bool
Has
(
const
KeyType
&
key
)
const
{
return
info_map_
.
find
(
key
)
!=
info_map_
.
end
();
}
void
Insert
(
const
KeyType
&
key
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
exe_ctx
)
{
PADDLE_ENFORCE_NE
(
Has
(
key
),
true
,
platform
::
errors
::
NotFound
(
"(programDesc: %s, is_grad: %s) has existed in ExecutorInfoCache"
,
key
.
first
,
key
.
second
));
info_map_
.
insert
(
std
::
make_pair
(
key
,
exe_ctx
));
}
private:
ExecutorInfoCache
()
=
default
;
std
::
unordered_map
<
KeyType
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
,
HashPair
>
info_map_
;
DISABLE_COPY_AND_ASSIGN
(
ExecutorInfoCache
);
};
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
GetExecutorInfoFromCache
(
const
framework
::
Executor
&
exe
,
const
framework
::
ExecutionContext
&
ctx
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>&
ctx_output_names
,
bool
is_grad
);
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
67c700b4
...
...
@@ -64,9 +64,11 @@ if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON)
SET
(
OP_MKL_DEPS
${
OP_MKL_DEPS
}
pyramid_hash_op
)
endif
()
register_operators
(
EXCLUDES py_func_op warpctc_op dgc_op lstm_op
register_operators
(
EXCLUDES py_func_op warpctc_op dgc_op lstm_op
run_program_op
sync_batch_norm_op
${
OP_MKL_DEPS
}
DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
op_library
(
run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
if
(
WITH_GPU
)
# warpctc_op needs cudnn 7 above
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
...
...
paddle/fluid/operators/run_program_op.h
浏览文件 @
67c700b4
...
...
@@ -16,12 +16,15 @@ limitations under the License. */
#include <algorithm>
#include <iterator>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor_cache.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
@@ -156,46 +159,6 @@ static void ShareVarsFromScope(const std::vector<Variable *> &vars,
}
}
static
void
AppendSkipDeletionVars
(
const
std
::
vector
<
std
::
string
>
&
append_vars
,
std
::
vector
<
std
::
string
>
*
all_vars
)
{
for
(
auto
&
var
:
append_vars
)
{
all_vars
->
emplace_back
(
var
);
}
}
static
void
AppendSafeEagerDeletionSkipVars
(
const
framework
::
ProgramDesc
&
program
,
std
::
vector
<
std
::
string
>
*
skip_vars
)
{
const
framework
::
BlockDesc
&
block
=
program
.
Block
(
0
);
const
std
::
vector
<
framework
::
OpDesc
*>
&
all_ops
=
block
.
AllOps
();
std
::
unordered_set
<
std
::
string
>
grad_op_output
;
std
::
unordered_set
<
std
::
string
>
grad_op_input
;
for
(
const
framework
::
OpDesc
*
op
:
all_ops
)
{
int
op_role
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
if
((
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kBackward
))
==
0
)
{
continue
;
}
for
(
const
std
::
string
&
in_arg_name
:
op
->
InputArgumentNames
())
{
grad_op_input
.
emplace
(
in_arg_name
);
}
for
(
const
std
::
string
&
out_arg_name
:
op
->
OutputArgumentNames
())
{
grad_op_output
.
emplace
(
out_arg_name
);
}
}
// For the grad op input variables, if it is not output of grad_op, it may
// be output of forward op and we should set the variables as skip_var to
// prevent it being deleted when grad op is called multiple times.
for
(
const
std
::
string
&
var_name
:
grad_op_input
)
{
if
(
grad_op_output
.
find
(
var_name
)
==
grad_op_output
.
end
())
{
skip_vars
->
emplace_back
(
var_name
);
}
}
}
}
// namespace details
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -217,8 +180,6 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
param_names
=
ctx
.
InputNames
(
"Params"
);
}
auto
*
block
=
ctx
.
Attr
<
BlockDesc
*>
(
"global_block"
);
auto
*
program
=
block
->
Program
();
auto
start_op_index
=
ctx
.
Attr
<
int64_t
>
(
"start_op_index"
);
auto
end_op_index
=
ctx
.
Attr
<
int64_t
>
(
"end_op_index"
);
auto
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
...
...
@@ -233,14 +194,8 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
// Step 2. prepare executor and init persistable variables
framework
::
Executor
exe
(
ctx
.
GetPlace
());
// skip delete vars
std
::
vector
<
std
::
string
>
skip_vars
;
details
::
AppendSkipDeletionVars
(
output_var_names
,
&
skip_vars
);
VLOG
(
2
)
<<
"Prepare to skip "
<<
skip_vars
.
size
()
<<
" var(s): "
<<
string
::
join_strings
(
skip_vars
,
' '
);
auto
exe_ctx
=
exe
.
Prepare
(
*
program
,
0
,
skip_vars
);
auto
exe_ctx
=
framework
::
GetExecutorInfoFromCache
(
exe
,
ctx
,
{
output_var_names
},
/*is_grad=*/
false
);
// NOTE(Aurelius84): While training some models, forward can be called many
// times and then apply backpropagation all at once, such as Reinforcement
...
...
@@ -259,7 +214,8 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
// Step 3. run ops
exe
.
RunPartialPreparedContext
(
exe_ctx
.
get
(),
&
scope
,
start_op_index
,
end_op_index
,
/*create_local_scope=*/
false
,
/*create_vars=*/
true
,
/*keep_kids=*/
!
is_test
);
/*create_vars=*/
true
,
/*keep_kids=*/
!
is_test
);
// Step 4. Get Output
details
::
ShareVarsFromScope
(
output_vars
,
output_var_names
,
&
scope
);
...
...
@@ -305,8 +261,6 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
}
auto
*
block
=
ctx
.
Attr
<
BlockDesc
*>
(
"global_block"
);
auto
*
program
=
block
->
Program
();
auto
orig_end_op_index
=
ctx
.
Attr
<
int64_t
>
(
"end_op_index"
);
// NOTE: skip `shape` and `fill_constant` op created by
// fluid.backward.gradients, one forward output will generate one `shape`
...
...
@@ -332,20 +286,12 @@ class RunProgramGradOpKernel : public framework::OpKernel<T> {
// Step 2. prepare executor and scope
framework
::
Executor
exe
(
ctx
.
GetPlace
());
// skip delete vars
std
::
vector
<
std
::
string
>
skip_vars
;
details
::
AppendSkipDeletionVars
(
input_grad_var_names
,
&
skip_vars
);
details
::
AppendSkipDeletionVars
(
param_grad_names
,
&
skip_vars
);
details
::
AppendSafeEagerDeletionSkipVars
(
*
program
,
&
skip_vars
);
VLOG
(
2
)
<<
"Prepare to skip "
<<
skip_vars
.
size
()
<<
" var(s): "
<<
string
::
join_strings
(
skip_vars
,
' '
);
auto
exe_ctx
=
exe
.
Prepare
(
*
program
,
0
,
skip_vars
);
auto
exe_ctx
=
framework
::
GetExecutorInfoFromCache
(
exe
,
ctx
,
{
input_grad_var_names
,
param_grad_names
},
/*is_grad=*/
true
);
details
::
ShareVarsIntoScope
(
output_grad_vars
,
output_grad_var_names
,
&
scope
);
// Debug info: scope info when run end
VLOG
(
3
)
<<
framework
::
GenScopeTreeDebugInfo
(
out_scope_vec
->
front
());
...
...
python/paddle/fluid/tests/unittests/test_run_program_op.py
浏览文件 @
67c700b4
...
...
@@ -167,6 +167,9 @@ class RunProgramOpTest(unittest.TestCase):
return
outputs
def
calc_dygraph_output
(
self
,
place
):
self
.
program_desc
,
self
.
fwd_op_num
=
self
.
get_program_desc
()
self
.
attrs
=
self
.
prepare_attrs
()
with
fluid
.
dygraph
.
guard
(
place
):
inputs
=
self
.
prepare_dygraph_input
(
place
)
outputs
=
self
.
prepare_dygraph_output
()
...
...
@@ -179,6 +182,9 @@ class RunProgramOpTest(unittest.TestCase):
return
outputs
[
'Out'
]
def
calc_dygraph_grad
(
self
,
place
):
self
.
program_desc
,
self
.
fwd_op_num
=
self
.
get_program_desc
()
self
.
attrs
=
self
.
prepare_attrs
()
with
fluid
.
dygraph
.
guard
(
place
):
# Step 1. run forward
inputs
,
input_param_list
=
self
.
prepare_dygraph_input
(
place
,
True
)
...
...
@@ -241,10 +247,6 @@ class TestRunProgramOpWithFC(RunProgramOpTest):
}
}
self
.
program_desc
,
self
.
fwd_op_num
=
self
.
get_program_desc
()
self
.
attrs
=
self
.
prepare_attrs
()
def
test_check_output
(
self
):
self
.
check_output
()
...
...
@@ -298,10 +300,6 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
}
}
self
.
program_desc
,
self
.
fwd_op_num
=
self
.
get_program_desc
()
self
.
attrs
=
self
.
prepare_attrs
()
def
test_check_output
(
self
):
self
.
check_output
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录