Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f720e231
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f720e231
编写于
6月 30, 2022
作者:
R
Ruibiao Chen
提交者:
GitHub
6月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove boost::variant for FetchResultType (#43932)
* Remove boost::variant for FetchResultType * Fix pybind errors
上级
6467ca0d
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
90 addition
and
61 deletion
+90
-61
paddle/fluid/framework/details/async_ssa_graph_executor.cc
paddle/fluid/framework/details/async_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/details/fetch_async_op_handle.cc
paddle/fluid/framework/details/fetch_async_op_handle.cc
+2
-2
paddle/fluid/framework/details/fetch_op_handle.cc
paddle/fluid/framework/details/fetch_op_handle.cc
+3
-3
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
...le/fluid/framework/details/parallel_ssa_graph_executor.cc
+3
-2
paddle/fluid/framework/feed_fetch_type.h
paddle/fluid/framework/feed_fetch_type.h
+1
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+54
-32
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+4
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+22
-18
未找到文件。
paddle/fluid/framework/details/async_ssa_graph_executor.cc
浏览文件 @
f720e231
...
...
@@ -174,7 +174,7 @@ FetchResultType AsyncSSAGraphExecutor::Run(
HandleException
();
FetchList
ret
;
auto
&
val
=
boost
::
get
<
FetchList
>
(
fetch_data
);
auto
&
val
=
BOOST_GET
(
FetchList
,
fetch_data
);
for
(
size_t
fetch_idx
=
0
;
fetch_idx
<
fetch_tensors
.
size
();
++
fetch_idx
)
{
if
(
data_is_lod_tensor
(
val
.
at
(
fetch_idx
)))
{
std
::
vector
<
const
LoDTensor
*>
lodtensor_ptrs
;
...
...
paddle/fluid/framework/details/fetch_async_op_handle.cc
浏览文件 @
f720e231
...
...
@@ -228,7 +228,7 @@ void FetchAsyncOpHandle::RunImpl() {
}
if
(
return_merged_
)
{
auto
&
val
=
boost
::
get
<
FetchList
>
(
*
data_
);
auto
&
val
=
BOOST_GET
(
FetchList
,
*
data_
);
if
(
src_vars
[
0
]
->
IsType
<
LoDTensor
>
())
{
// to lodtensor type
std
::
vector
<
const
LoDTensor
*>
src_lodtensors
;
...
...
@@ -263,7 +263,7 @@ void FetchAsyncOpHandle::RunImpl() {
val
.
at
(
offset_
)
=
std
::
move
(
dst_lodtensor_array
);
}
}
else
{
auto
&
val
=
boost
::
get
<
FetchUnmergedList
>
(
*
data_
);
auto
&
val
=
BOOST_GET
(
FetchUnmergedList
,
*
data_
);
auto
&
dst_tensors
=
val
.
at
(
offset_
);
dst_tensors
.
reserve
(
src_vars
.
size
());
...
...
paddle/fluid/framework/details/fetch_op_handle.cc
浏览文件 @
f720e231
...
...
@@ -84,7 +84,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
for
(
auto
&
t
:
tensors_
)
{
tensors_ptr
.
emplace_back
(
&
BOOST_GET_CONST
(
LoDTensor
,
t
));
}
auto
&
val
=
boost
::
get
<
FetchList
>
(
*
data_
);
auto
&
val
=
BOOST_GET
(
FetchList
,
*
data_
);
LoDTensor
var
;
MergeLoDTensor
(
&
var
,
tensors_ptr
,
platform
::
CPUPlace
());
val
.
at
(
offset_
)
=
std
::
move
(
var
);
...
...
@@ -106,11 +106,11 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
tmp_array
.
emplace_back
();
MergeLoDTensor
(
&
(
tmp_array
.
back
()),
tensors_ptr
,
platform
::
CPUPlace
());
}
auto
&
val
=
boost
::
get
<
FetchList
>
(
*
data_
);
auto
&
val
=
BOOST_GET
(
FetchList
,
*
data_
);
val
.
at
(
offset_
)
=
std
::
move
(
tmp_array
);
}
}
else
{
auto
&
val
=
boost
::
get
<
FetchUnmergedList
>
(
*
data_
);
auto
&
val
=
BOOST_GET
(
FetchUnmergedList
,
*
data_
);
val
.
at
(
offset_
)
=
std
::
move
(
tensors_
);
}
}
...
...
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
浏览文件 @
f720e231
...
...
@@ -278,7 +278,8 @@ FetchResultType ParallelSSAGraphExecutor::Run(
if
(
!
is_valid
[
scope_idx
])
{
continue
;
}
const
auto
&
fetch_list
=
boost
::
get
<
FetchList
>
(
fetch_data
[
scope_idx
]);
const
auto
&
fetch_list
=
BOOST_GET_CONST
(
FetchList
,
fetch_data
[
scope_idx
]);
if
(
data_is_lod_tensor
(
fetch_list
[
fetch_idx
]))
{
lodtensor_ptrs
.
push_back
(
&
(
BOOST_GET_CONST
(
LoDTensor
,
fetch_list
[
fetch_idx
])));
...
...
@@ -317,7 +318,7 @@ FetchResultType ParallelSSAGraphExecutor::Run(
continue
;
}
const
auto
&
fetch_list
=
boost
::
get
<
FetchUnmergedList
>
(
fetch_data
[
scope_idx
]);
BOOST_GET_CONST
(
FetchUnmergedList
,
fetch_data
[
scope_idx
]);
PADDLE_ENFORCE_EQ
(
fetch_list
[
fetch_idx
].
size
(),
1
,
...
...
paddle/fluid/framework/feed_fetch_type.h
浏览文件 @
f720e231
...
...
@@ -30,7 +30,7 @@ using FetchType = paddle::variant<LoDTensor, LoDTensorArray, framework::Vocab>;
using
FetchList
=
std
::
vector
<
FetchType
>
;
using
FetchUnmergedList
=
std
::
vector
<
std
::
vector
<
FetchType
>>
;
using
FetchResultType
=
boost
::
variant
<
FetchList
,
FetchUnmergedList
>
;
using
FetchResultType
=
paddle
::
variant
<
FetchList
,
FetchUnmergedList
>
;
inline
bool
data_is_lod_tensor
(
const
FetchType
&
data
)
{
if
(
data
.
type
()
==
typeid
(
LoDTensor
))
{
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
f720e231
...
...
@@ -972,37 +972,26 @@ void ParallelExecutor::BCastParamsToDevices(
}
}
FetchResultType
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
bool
return_merged
)
{
platform
::
RecordEvent
record_run
(
"ParallelExecutor::Run"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
VLOG
(
3
)
<<
"enter ParallelExecutor Run"
;
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
IsCUDAGraphCapturing
())
{
PADDLE_ENFORCE_EQ
(
fetch_tensors
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Cannot fetch data when using CUDA Graph."
));
PADDLE_ENFORCE_EQ
(
member_
->
build_strategy_
.
allow_cuda_graph_capture_
,
true
,
platform
::
errors
::
InvalidArgument
(
"You must turn on build_strategy.allow_cuda_graph_capture = True "
"to enable CUDA Graph capturing."
));
PADDLE_ENFORCE_EQ
(
member_
->
places_
[
0
],
platform
::
CUDAGraphCapturingPlace
(),
platform
::
errors
::
InvalidArgument
(
"The place to capture CUDAGraph is "
"not the same as the place to run."
));
}
#endif
FetchUnmergedList
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
{
PreludeToRun
(
fetch_tensors
);
platform
::
RecordBlock
b
(
0
);
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
}
#endif
ResetHasFeedGuard
reset_has_feed_guard
(
member_
);
ir
::
SkipMemOptVarsGuard
guard
(
&
(
member_
->
mem_opt_var_infos_
),
fetch_tensors
,
member_
->
HasGarbageCollectors
());
VLOG
(
3
)
<<
"ParallelExecutor begin to run member_->executor_->Run"
;
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
,
/*return_merged=*/
false
);
return
BOOST_GET
(
FetchUnmergedList
,
fetch_data
);
}
FetchList
ParallelExecutor
::
RunAndMerge
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
{
PreludeToRun
(
fetch_tensors
);
platform
::
RecordBlock
b
(
0
);
ResetHasFeedGuard
reset_has_feed_guard
(
member_
);
...
...
@@ -1011,9 +1000,10 @@ FetchResultType ParallelExecutor::Run(
fetch_tensors
,
member_
->
HasGarbageCollectors
());
VLOG
(
3
)
<<
"ParallelExecutor begin to run member_->executor_->Run"
;
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
,
return_merged
);
return
fetch_data
;
VLOG
(
3
)
<<
"ParallelExecutor begin to run member_->executor_->RunAndMerge"
;
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
,
/*return_merged=*/
true
);
return
BOOST_GET
(
FetchList
,
fetch_data
);
}
void
ParallelExecutor
::
RunWithoutFetch
(
...
...
@@ -1440,6 +1430,38 @@ std::vector<ir::Graph *> ParallelExecutor::CloneGraphToMultiDevices(
return
graphs
;
}
void
ParallelExecutor
::
PreludeToRun
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
{
platform
::
RecordEvent
record_run
(
"ParallelExecutor::Run"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
VLOG
(
3
)
<<
"enter ParallelExecutor Run"
;
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
IsCUDAGraphCapturing
())
{
PADDLE_ENFORCE_EQ
(
fetch_tensors
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Cannot fetch data when using CUDA Graph."
));
PADDLE_ENFORCE_EQ
(
member_
->
build_strategy_
.
allow_cuda_graph_capture_
,
true
,
platform
::
errors
::
InvalidArgument
(
"You must turn on build_strategy.allow_cuda_graph_capture = True "
"to enable CUDA Graph capturing."
));
PADDLE_ENFORCE_EQ
(
member_
->
places_
[
0
],
platform
::
CUDAGraphCapturingPlace
(),
platform
::
errors
::
InvalidArgument
(
"The place to capture CUDAGraph is "
"not the same as the place to run."
));
}
#endif
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
}
#endif
}
void
ParallelExecutor
::
PrepareNCCLCommunicator
(
Scope
*
global_scope
)
{
if
(
member_
->
build_strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kNoReduce
)
{
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
f720e231
...
...
@@ -89,8 +89,8 @@ class ParallelExecutor {
void
FeedAndSplitTensorIntoLocalScopes
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
tensors
);
Fetch
ResultType
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
bool
return_merged
=
true
);
Fetch
UnmergedList
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
);
FetchList
RunAndMerge
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
);
void
RunWithoutFetch
(
const
std
::
vector
<
std
::
string
>
&
skip_eager_vars
);
...
...
@@ -126,6 +126,8 @@ class ParallelExecutor {
std
::
vector
<
ir
::
Graph
*>
CloneGraphToMultiDevices
(
ir
::
Graph
*
graph
);
void
PreludeToRun
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
);
void
PrepareNCCLCommunicator
(
Scope
*
global_scope
);
std
::
vector
<
ir
::
Graph
*>
CompileGraphWithBuildStrategy
(
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
f720e231
...
...
@@ -3225,13 +3225,17 @@ All parameter, weight, gradient are variables in Paddle.
#endif
m
.
def
(
"set_feed_variable"
,
static_cast
<
void
(
*
)(
Scope
*
,
const
LoDTensor
&
,
const
std
::
string
&
,
size_t
)
>
(
&
framework
::
SetFeedVariable
));
static_cast
<
void
(
*
)(
// NOLINT
Scope
*
,
const
LoDTensor
&
,
const
std
::
string
&
,
size_t
)
>
(
&
framework
::
SetFeedVariable
));
m
.
def
(
"set_feed_variable"
,
static_cast
<
void
(
*
)(
Scope
*
,
const
Strings
&
,
const
std
::
string
&
,
size_t
)
>
(
&
framework
::
SetFeedVariable
));
static_cast
<
void
(
*
)(
// NOLINT
Scope
*
,
const
Strings
&
,
const
std
::
string
&
,
size_t
)
>
(
&
framework
::
SetFeedVariable
));
m
.
def
(
"get_fetch_variable"
,
[](
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
...
...
@@ -4601,20 +4605,20 @@ All parameter, weight, gradient are variables in Paddle.
[](
ParallelExecutor
&
self
,
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
bool
return_merged
)
->
py
::
object
{
paddle
::
framework
::
FetchResultType
ret
;
{
pybind11
::
gil_scoped_release
release
;
ret
=
self
.
Run
(
fetch_tensors
,
return_merged
);
}
// TODO(Ruibiao): Refactor the run interface of PE to avoid use
// boost::get here
if
(
return_merged
)
{
return
py
::
cast
(
std
::
move
(
boost
::
get
<
paddle
::
framework
::
FetchList
>
(
ret
)));
paddle
::
framework
::
FetchList
ret
;
/*gil_scoped_release*/
{
pybind11
::
gil_scoped_release
release
;
ret
=
self
.
RunAndMerge
(
fetch_tensors
);
}
return
py
::
cast
(
std
::
move
(
ret
));
}
else
{
return
py
::
cast
(
std
::
move
(
boost
::
get
<
paddle
::
framework
::
FetchUnmergedList
>
(
ret
)));
paddle
::
framework
::
FetchUnmergedList
ret
;
/*gil_scoped_release*/
{
pybind11
::
gil_scoped_release
release
;
ret
=
self
.
Run
(
fetch_tensors
);
}
return
py
::
cast
(
std
::
move
(
ret
));
}
})
.
def
(
"device_count"
,
&
ParallelExecutor
::
DeviceCount
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录