Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
5303b66b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2297
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
5303b66b
编写于
10月 12, 2022
作者:
L
Leo Chen
提交者:
GitHub
10月 12, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
clean code of interpretercore (#46891)
* refactor * refine code
上级
21fab90d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
170 addition
and
204 deletion
+170
-204
paddle/fluid/framework/new_executor/data_transfer.cc
paddle/fluid/framework/new_executor/data_transfer.cc
+2
-2
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+93
-108
paddle/fluid/framework/new_executor/interpretercore.h
paddle/fluid/framework/new_executor/interpretercore.h
+29
-31
paddle/fluid/framework/new_executor/interpretercore_util.cc
paddle/fluid/framework/new_executor/interpretercore_util.cc
+31
-45
paddle/fluid/framework/new_executor/interpretercore_util.h
paddle/fluid/framework/new_executor/interpretercore_util.h
+14
-17
paddle/fluid/framework/new_executor/new_executor_defs.h
paddle/fluid/framework/new_executor/new_executor_defs.h
+1
-1
未找到文件。
paddle/fluid/framework/new_executor/data_transfer.cc
浏览文件 @
5303b66b
...
...
@@ -378,7 +378,7 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
"Required src_place shall be different with dst_place, "
"but received same place: %s"
,
src_place
));
if
(
IsSupportedHetePlace
(
dst_place
))
{
if
(
IsSupportedHete
r
Place
(
dst_place
))
{
op_type
=
kMemcpyH2D
;
int
dst_place_type
=
platform
::
is_gpu_place
(
dst_place
)
?
0
:
platform
::
is_npu_place
(
dst_place
)
?
1
...
...
@@ -387,7 +387,7 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
:
platform
::
is_custom_place
(
dst_place
)
?
6
:
-
1
;
attr_map
=
{{
"dst_place_type"
,
dst_place_type
}};
}
else
if
(
IsSupportedHetePlace
(
src_place
))
{
}
else
if
(
IsSupportedHete
r
Place
(
src_place
))
{
op_type
=
kMemcpyD2H
;
int
dst_place_type
=
platform
::
is_cpu_place
(
dst_place
)
?
0
:
platform
::
is_cuda_pinned_place
(
dst_place
)
?
1
...
...
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
5303b66b
...
...
@@ -57,6 +57,50 @@ constexpr const char* kTaskCompletion = "TaskCompletion";
namespace
paddle
{
namespace
framework
{
inline
void
SetDeviceId
(
const
platform
::
Place
&
place
)
{
// TODO(zhiqiu): reduce the cost
if
(
platform
::
is_gpu_place
(
place
))
{
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CUDA support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#ifndef PADDLE_WITH_XPU
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with XPU support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetXPUDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_npu_place
(
place
))
{
#ifndef PADDLE_WITH_ASCEND_CL
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with NPU support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetNPUDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_custom_place
(
place
))
{
#ifndef PADDLE_WITH_CUSTOM_DEVICE
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CustomDevice support."
,
place
));
#else
phi
::
DeviceManager
::
SetDevice
(
place
);
#endif
}
}
// TODO(Ruibia): Pass skip_gc_vars, used_for_jit, and other config messages by
// constructing an interpreter::ExecutionConfig
InterpreterCore
::
InterpreterCore
(
const
platform
::
Place
&
place
,
...
...
@@ -71,8 +115,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
stream_analyzer_
(
place
)
{
VLOG
(
4
)
<<
"InterpreterCore(): "
<<
this
<<
" on "
<<
place_
;
is_build_
=
false
;
exception_notifier_
=
main_thread_blocker_
.
RegisterEvent
(
kExceptionCaught
);
completion_notifier_
=
main_thread_blocker_
.
RegisterEvent
(
kTaskCompletion
);
...
...
@@ -87,12 +129,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
local_scope_
=
local_scope
;
}
var_scope_
.
SetLocalScope
(
local_scope_
);
// prune
// optmize graph pass
// convert to run graph
}
InterpreterCore
::~
InterpreterCore
()
{
...
...
@@ -111,11 +147,8 @@ InterpreterCore::~InterpreterCore() {
interpreter
::
CostInfo
InterpreterCore
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
phi
::
DenseTensor
>&
feed_tensors
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
place_
.
device
);
}
#endif
SetDeviceId
(
place_
);
Prepare
(
feed_names
,
feed_tensors
,
true
);
interpreter
::
CostInfo
cost_info
;
{
...
...
@@ -135,7 +168,7 @@ interpreter::CostInfo InterpreterCore::DryRun(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
}
if
(
execution_config_
.
create_local_scope
)
{
if
(
HasLocalScope
()
)
{
ClearLoDTensorArrayInLocalScope
();
}
...
...
@@ -145,11 +178,7 @@ interpreter::CostInfo InterpreterCore::DryRun(
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
phi
::
DenseTensor
>&
feed_tensors
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
place_
.
device
);
}
#endif
SetDeviceId
(
place_
);
#ifdef PADDLE_WITH_MKLDNN
platform
::
AttachPointerHashToMKLDNNKey
(
this
,
place_
);
...
...
@@ -181,7 +210,7 @@ paddle::framework::FetchList InterpreterCore::Run(
}
#endif
}
if
(
execution_config_
.
create_local_scope
)
{
if
(
HasLocalScope
()
)
{
ClearLoDTensorArrayInLocalScope
();
}
...
...
@@ -196,11 +225,7 @@ paddle::framework::FetchList InterpreterCore::Run(
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
place_
.
device
);
}
#endif
SetDeviceId
(
place_
);
#ifdef PADDLE_WITH_MKLDNN
platform
::
AttachPointerHashToMKLDNNKey
(
this
,
place_
);
...
...
@@ -208,17 +233,17 @@ paddle::framework::FetchList InterpreterCore::Run(
if
(
!
is_build_
)
{
LOG_FIRST_N
(
INFO
,
1
)
<<
"New Executor is Running."
;
paddle
::
framework
::
interpreter
::
build_variable_s
cope
(
block_
,
&
var_scope_
,
execution_config_
.
create_local_scope
);
paddle
::
framework
::
interpreter
::
BuildVariableS
cope
(
block_
,
&
var_scope_
,
HasLocalScope
()
);
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>
op_func_nodes
;
paddle
::
framework
::
interpreter
::
build_op_func_l
ist
(
paddle
::
framework
::
interpreter
::
BuildOpFuncL
ist
(
place_
,
block_
,
execution_config_
.
skip_gc_vars
,
&
op_func_nodes
,
&
var_scope_
,
execution_config_
.
create_local_scope
,
HasLocalScope
()
,
execution_config_
.
used_for_jit
);
is_build_
=
true
;
SetFeedVarsInplaceSkip
(
feed_names
);
...
...
@@ -248,13 +273,13 @@ paddle::framework::FetchList InterpreterCore::Run(
#endif
}
if
(
execution_config_
.
create_local_scope
)
{
if
(
HasLocalScope
()
)
{
ClearLoDTensorArrayInLocalScope
();
}
// return Fetch Tensors
Scope
*
inner_scope
=
execution_config_
.
create_local_scope
?
local_scope_
:
var_scope_
.
GetMutableScope
();
Scope
*
inner_scope
=
HasLocalScope
()
?
local_scope_
:
var_scope_
.
GetMutableScope
();
auto
*
fetch_var
=
inner_scope
->
FindVar
(
interpreter
::
kFetchVarName
);
if
(
fetch_var
)
{
return
std
::
move
(
*
fetch_var
->
GetMutable
<
framework
::
FetchList
>
());
...
...
@@ -327,9 +352,8 @@ std::shared_ptr<interpreter::AsyncWorkQueue> InterpreterCore::GetWorkQueue() {
}
void
InterpreterCore
::
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
)
{
Scope
*
inner_scope
=
execution_config_
.
create_local_scope
?
local_scope_
:
var_scope_
.
GetMutableScope
();
Scope
*
inner_scope
=
HasLocalScope
()
?
local_scope_
:
var_scope_
.
GetMutableScope
();
VariableValueMap
ins_map
;
for
(
auto
&
var_name_item
:
instr_node
->
Inputs
())
{
std
::
vector
<
Variable
*>
input_vars
;
...
...
@@ -355,9 +379,8 @@ void InterpreterCore::BuildAndCacheInstructionCtx(Instruction* instr_node) {
// set runtime_ctx and infershape_ctx_
if
(
instr_node
->
OpBase
()
->
Type
()
==
"cinn_launch"
)
{
// OP use scope in
// kernel
Scope
*
local_scope
=
execution_config_
.
create_local_scope
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
Scope
*
local_scope
=
HasLocalScope
()
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
instr_node
->
ResetContextWithScope
(
ins_map
,
outs_map
,
*
local_scope
);
}
else
{
instr_node
->
ResetContext
(
ins_map
,
outs_map
);
...
...
@@ -387,9 +410,8 @@ void InterpreterCore::BuildInplace() {
}
}
Scope
*
local_scope
=
execution_config_
.
create_local_scope
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
Scope
*
local_scope
=
HasLocalScope
()
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
std
::
vector
<
std
::
vector
<
size_t
>>
input_var2op
(
var_scope_
.
VarSize
());
for
(
Instruction
&
instr
:
vec_instruction_
)
{
for
(
auto
&
item
:
instr
.
Inputs
())
{
...
...
@@ -524,9 +546,8 @@ void InterpreterCore::Convert(
}
for
(
auto
var_id
:
gc_check_vars
)
{
Scope
*
inner_scope
=
execution_config_
.
create_local_scope
?
local_scope_
:
var_scope_
.
GetMutableScope
();
Scope
*
inner_scope
=
HasLocalScope
()
?
local_scope_
:
var_scope_
.
GetMutableScope
();
paddle
::
framework
::
Variable
*
var
=
inner_scope
->
FindVar
(
var_scope_
.
GetNameById
(
var_id
));
if
(
var
->
IsType
<
LoDTensor
>
()
||
var
->
IsType
<
phi
::
SelectedRows
>
()
||
...
...
@@ -629,56 +650,11 @@ void InterpreterCore::BuildSkipShareLoDInfo() {
}
}
inline
void
SetDeviceId
(
const
platform
::
Place
&
place
)
{
// TODO(zhiqiu): reduce the cost
if
(
platform
::
is_gpu_place
(
place
))
{
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CUDA support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#ifndef PADDLE_WITH_XPU
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with XPU support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetXPUDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_npu_place
(
place
))
{
#ifndef PADDLE_WITH_ASCEND_CL
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with NPU support."
,
place
));
#else
auto
dev_id
=
place
.
device
;
platform
::
SetNPUDeviceId
(
dev_id
);
#endif
}
else
if
(
platform
::
is_custom_place
(
place
))
{
#ifndef PADDLE_WITH_CUSTOM_DEVICE
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Cannot run operator on place %s, please recompile paddle or "
"reinstall Paddle with CustomDevice support."
,
place
));
#else
phi
::
DeviceManager
::
SetDevice
(
place
);
#endif
}
}
void
InterpreterCore
::
RunInstruction
(
const
Instruction
&
instr_node
)
{
auto
*
op
=
instr_node
.
OpBase
();
auto
place
=
instr_node
.
DeviceContext
().
GetPlace
();
Scope
*
local_scope
=
execution_config_
.
create_local_scope
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
Scope
*
local_scope
=
HasLocalScope
()
?
var_scope_
.
GetMutableLocalScope
()
:
var_scope_
.
GetMutableScope
();
VLOG
(
4
)
<<
"Start run "
<<
place
<<
" "
<<
op
->
DebugStringEx
(
local_scope_
);
SetDeviceId
(
place
);
...
...
@@ -800,8 +776,8 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
void
InterpreterCore
::
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
)
{
interpreter
::
ResetAtomicGuard
guard
(
&
deps_
,
&
refs_
);
unfinished_op_numer_
=
vec_instr
.
size
();
if
(
unfinished_op_numer_
==
0
)
{
unfinished_op_num
b
er_
=
vec_instr
.
size
();
if
(
unfinished_op_num
b
er_
==
0
)
{
VLOG
(
4
)
<<
"No op to run, return"
;
return
;
}
...
...
@@ -878,8 +854,12 @@ void InterpreterCore::RunNextInstructions(
[
this
,
next_id
]
{
RunInstructionAsync
(
next_id
);
});
}
}
auto
direct_run_ops
=
interpreter
::
merge_vector
(
next_instr
.
SyncRunIds
(),
next_instr
.
DirectRunIds
());
std
::
vector
<
size_t
>
direct_run_ops
=
next_instr
.
SyncRunIds
();
direct_run_ops
.
insert
(
direct_run_ops
.
end
(),
next_instr
.
DirectRunIds
().
begin
(),
next_instr
.
DirectRunIds
().
end
());
int64_t
first_op
=
-
1
;
for
(
auto
next_id
:
direct_run_ops
)
{
if
(
IsReady
(
next_id
))
{
...
...
@@ -949,9 +929,9 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
return
;
}
VLOG
(
4
)
<<
"unfinished_op_num
er_: "
<<
unfinished_op_num
er_
;
if
(
UNLIKELY
(
unfinished_op_num
er_
.
fetch_sub
(
1
,
std
::
memory_order_relaxed
)
==
1
))
{
VLOG
(
4
)
<<
"unfinished_op_num
ber_: "
<<
unfinished_op_numb
er_
;
if
(
UNLIKELY
(
unfinished_op_num
ber_
.
fetch_sub
(
1
,
std
::
memory_order_relaxed
)
==
1
))
{
if
(
completion_notifier_
!=
nullptr
)
{
completion_notifier_
->
NotifyEvent
();
}
...
...
@@ -961,8 +941,11 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void
InterpreterCore
::
RecordStreamForGC
(
const
Instruction
&
instr
)
{
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"RecordStreamForGC is only implemented when compiled with GPU."
));
#else
if
(
!
IsInterpretercoreFastGCEnabled
()
||
instr
.
KernelType
()
!=
OpFuncType
::
kQueueAsync
)
{
return
;
...
...
@@ -1053,8 +1036,8 @@ void InterpreterCore::RecordStreamForGC(const Instruction& instr) {
framework
::
ToTypeName
(
var
->
Type
())));
}
}
}
#endif
}
void
InterpreterCore
::
CheckGC
(
const
Instruction
&
instr
)
{
platform
::
RecordEvent
record
(
...
...
@@ -1106,17 +1089,17 @@ void InterpreterCore::Prepare(const std::vector<std::string>& feed_names,
};
if
(
!
is_build_
)
{
paddle
::
framework
::
interpreter
::
build_variable_s
cope
(
block_
,
&
var_scope_
,
execution_config_
.
create_local_scope
);
paddle
::
framework
::
interpreter
::
BuildVariableS
cope
(
block_
,
&
var_scope_
,
HasLocalScope
()
);
FeedInput
();
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>
op_func_nodes
;
paddle
::
framework
::
interpreter
::
build_op_func_l
ist
(
paddle
::
framework
::
interpreter
::
BuildOpFuncL
ist
(
place_
,
block_
,
execution_config_
.
skip_gc_vars
,
&
op_func_nodes
,
&
var_scope_
,
execution_config_
.
create_local_scope
,
HasLocalScope
()
,
execution_config_
.
used_for_jit
);
is_build_
=
true
;
SetFeedVarsInplaceSkip
(
feed_names
);
...
...
@@ -1124,7 +1107,7 @@ void InterpreterCore::Prepare(const std::vector<std::string>& feed_names,
Convert
(
&
op_func_nodes
);
}
// NOTE: Because feed_tensor will be GC after
// paddle::framework::
build_op_func_l
ist, so we should
// paddle::framework::
BuildOpFuncL
ist, so we should
// call FeedInput again.
if
(
prepare_feed
)
{
FeedInput
();
...
...
@@ -1138,6 +1121,8 @@ void InterpreterCore::SetFeedVarsInplaceSkip(
}
}
bool
InterpreterCore
::
HasLocalScope
()
const
{
return
local_scope_
!=
nullptr
;
}
std
::
shared_ptr
<
InterpreterCore
>
CreateInterpreterCore
(
const
platform
::
Place
&
place
,
const
ProgramDesc
&
prog
,
...
...
@@ -1145,11 +1130,11 @@ std::shared_ptr<InterpreterCore> CreateInterpreterCore(
const
std
::
vector
<
std
::
string
>&
fetch_names
,
const
std
::
set
<
std
::
string
>&
skip_gc_vars
)
{
std
::
shared_ptr
<
InterpreterCore
>
core
=
nullptr
;
// NOTE(Aurelius84): `
add_f
etch` will modify BlockDesc, so we should copy
// NOTE(Aurelius84): `
AddF
etch` will modify BlockDesc, so we should copy
// a new program.
auto
new_prog
=
std
::
make_shared
<
framework
::
ProgramDesc
>
(
prog
);
auto
*
block
=
new_prog
->
MutableBlock
(
0
);
interpreter
::
add_f
etch
(
fetch_names
,
block
);
interpreter
::
AddF
etch
(
fetch_names
,
block
);
core
=
std
::
make_shared
<
InterpreterCore
>
(
place
,
*
block
,
skip_gc_vars
,
scope
);
core
->
SetCopyProgram
(
new_prog
);
...
...
paddle/fluid/framework/new_executor/interpretercore.h
浏览文件 @
5303b66b
...
...
@@ -68,45 +68,42 @@ class InterpreterCore {
void
reset_scope
(
Scope
*
new_scope
);
private:
bool
BuildInplaceCheckVarIsOnlyInput
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
input_var2op
,
size_t
var_index
);
std
::
shared_ptr
<
interpreter
::
AsyncWorkQueue
>
GetWorkQueue
();
// build graph
void
Convert
(
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>*
op_func_nodes
);
void
BuildOperatorDependences
();
void
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
);
void
BuildSkipShareLoDInfo
();
// inplace
void
BuildInplace
();
bool
BuildInplaceCheckVarIsOnlyInput
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
input_var2op
,
size_t
var_index
);
void
SetFeedVarsInplaceSkip
(
const
std
::
vector
<
std
::
string
>&
feed_names
);
void
BuildOperatorDependences
();
void
ClearLoDTensorArrayInLocalScope
();
void
Convert
(
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>*
op_func_nodes
);
void
RunInstruction
(
const
Instruction
&
instr_node
);
// execution
void
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
);
void
RunInstructionAsync
(
size_t
instr_id
);
void
RunInstruction
(
const
Instruction
&
instr_node
);
void
RunNextInstructions
(
const
Instruction
&
instr_id
,
std
::
queue
<
size_t
>*
reserved_next_ops
);
// only used when program contains no feed op
void
Prepare
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
phi
::
DenseTensor
>&
feed_tensors
,
bool
prepare_feed
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// gc
void
RecordStreamForGC
(
const
Instruction
&
instr
);
#endif
void
CheckGC
(
const
Instruction
&
instr
);
void
ClearLoDTensorArrayInLocalScope
();
void
RunInstructionAsync
(
size_t
instr_id
);
void
RunNextInstructions
(
const
Instruction
&
instr_id
,
std
::
queue
<
size_t
>*
reserved_next_ops
);
void
BuildSkipShareLoDInfo
();
// workqueue
std
::
shared_ptr
<
interpreter
::
AsyncWorkQueue
>
GetWorkQueue
();
void
SetFeedVarsInplaceSkip
(
const
std
::
vector
<
std
::
string
>&
feed_names
);
// scope
bool
HasLocalScope
()
const
;
private:
bool
is_build_
;
bool
is_build_
{
false
}
;
platform
::
Place
place_
;
const
BlockDesc
&
block_
;
// not owned
...
...
@@ -127,11 +124,7 @@ class InterpreterCore {
std
::
vector
<
Instruction
>
vec_instruction_
;
// deconstruct before OpFuncNode
// last_live_ops_[i] contains the id of operators that last access var[i]
std
::
map
<
size_t
,
std
::
set
<
size_t
>>
last_live_ops_
;
std
::
vector
<
size_t
>
dependecy_count_
;
std
::
atomic
<
size_t
>
unfinished_op_numer_
{
0
};
std
::
atomic
<
size_t
>
unfinished_op_number_
{
0
};
VariableScope
var_scope_
;
Scope
*
local_scope_
{
nullptr
};
// not owned
...
...
@@ -145,8 +138,13 @@ class InterpreterCore {
std
::
unique_ptr
<
InterpreterCoreGarbageCollector
>
gc_
;
std
::
future
<
std
::
unique_ptr
<
AtomicVectorSizeT
>>
atomic_deps_
;
std
::
future
<
std
::
unique_ptr
<
AtomicVectorSizeT
>>
atomic_var_ref_
;
// last_live_ops_[i] contains the id of operators that last access the i-th
// var
std
::
map
<
size_t
,
std
::
set
<
size_t
>>
last_live_ops_
;
// dependecy_count_[i] contains the number of dependencies that the i-th op
// need to wait
std
::
vector
<
size_t
>
dependecy_count_
;
std
::
vector
<
std
::
shared_ptr
<
interpreter
::
OpDepInfo
>>
deps_
;
std
::
vector
<
std
::
shared_ptr
<
interpreter
::
VarRefInfo
>>
refs_
;
...
...
paddle/fluid/framework/new_executor/interpretercore_util.cc
浏览文件 @
5303b66b
...
...
@@ -122,8 +122,8 @@ bool var_can_be_deleted(const std::string& name, const BlockDesc& block) {
std
::
unordered_map
<
const
paddle
::
framework
::
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
get_unused_v
ars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>&
ops
)
{
GetUnusedV
ars
(
const
BlockDesc
&
block
,
const
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>&
ops
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
var_op_idx_map
;
for
(
size_t
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
...
...
@@ -166,17 +166,17 @@ get_unused_vars(const BlockDesc& block,
for
(
auto
&
name_op_idx_pair
:
var_op_idx_map
)
{
auto
&
name
=
name_op_idx_pair
.
first
;
size_t
op_idx
=
name_op_idx_pair
.
second
;
result
[
op
s
[
op_idx
].
get
()
].
emplace_back
(
name
);
VLOG
(
4
)
<<
op
s
[
op_idx
].
get
()
->
Type
()
<<
" "
<<
name
;
auto
op
=
ops
[
op_idx
].
get
();
result
[
op
].
emplace_back
(
name
);
VLOG
(
4
)
<<
op
->
Type
()
<<
" "
<<
name
;
}
VLOG
(
4
)
<<
"gc map size:"
<<
result
.
size
();
return
result
;
}
void
build_variable_s
cope
(
const
framework
::
BlockDesc
&
block
,
VariableScope
*
var_scope
,
bool
use_local_scope
)
{
void
BuildVariableS
cope
(
const
framework
::
BlockDesc
&
block
,
VariableScope
*
var_scope
,
bool
use_local_scope
)
{
VLOG
(
3
)
<<
"Creating Variables"
;
auto
inner_scope
=
var_scope
->
GetMutableScope
();
...
...
@@ -214,8 +214,8 @@ void build_variable_scope(const framework::BlockDesc& block,
}
}
void
create_all_o
ps
(
const
framework
::
BlockDesc
&
block
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>*
ops
)
{
void
CreateAllO
ps
(
const
framework
::
BlockDesc
&
block
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>*
ops
)
{
for
(
auto
&
op
:
block
.
AllOps
())
{
auto
op_type
=
op
->
Type
();
VLOG
(
8
)
<<
"CreateOp from : "
<<
op_type
;
...
...
@@ -289,9 +289,9 @@ std::tuple<VariableValueMap, VariableIdMap> BuildVariableMap(
return
std
::
make_tuple
(
name2var
,
name2id
);
}
void
apply_device_g
uard
(
const
OperatorBase
*
op_base
,
const
platform
::
Place
&
place
,
OpKernelType
*
expected_kernel_key
)
{
void
ApplyDeviceG
uard
(
const
OperatorBase
*
op_base
,
const
platform
::
Place
&
place
,
OpKernelType
*
expected_kernel_key
)
{
bool
need_change_place
=
(
op_base
->
HasAttr
(
"op_device"
)
&&
(
op_base
->
Attr
<
std
::
string
>
(
"op_device"
).
length
()
>
0
));
...
...
@@ -352,7 +352,7 @@ void apply_device_guard(const OperatorBase* op_base,
}
}
void
deal_operator_b
ase
(
const
platform
::
Place
&
place
,
void
HandleOperatorB
ase
(
const
platform
::
Place
&
place
,
const
VariableScope
*
var_scope
,
std
::
shared_ptr
<
OperatorBase
>
op_base
,
OpFuncNode
*
op_func_node
,
...
...
@@ -361,7 +361,7 @@ void deal_operator_base(const platform::Place& place,
auto
*
dev_ctx
=
pool
.
Get
(
place
);
// input, output is prepared. set the other attributes.
op_func_node
->
operator_base_
=
op_base
;
if
(
IsSupportedHetePlace
(
place
))
{
if
(
IsSupportedHete
r
Place
(
place
))
{
op_func_node
->
type_
=
OpFuncType
::
kQueueAsync
;
}
else
if
(
platform
::
is_cpu_place
(
place
))
{
op_func_node
->
type_
=
OpFuncType
::
kQueueSync
;
...
...
@@ -382,19 +382,19 @@ void deal_operator_base(const platform::Place& place,
op_func_node
->
dev_ctx_
=
dev_ctx
;
}
void
build_op_func_l
ist
(
const
platform
::
Place
&
place
,
const
framework
::
BlockDesc
&
block
,
const
std
::
set
<
std
::
string
>&
skip_gc_vars
,
std
::
vector
<
OpFuncNode
>*
vec_func_list
,
VariableScope
*
var_scope
,
bool
use_local_scope
,
bool
used_for_jit
)
{
void
BuildOpFuncL
ist
(
const
platform
::
Place
&
place
,
const
framework
::
BlockDesc
&
block
,
const
std
::
set
<
std
::
string
>&
skip_gc_vars
,
std
::
vector
<
OpFuncNode
>*
vec_func_list
,
VariableScope
*
var_scope
,
bool
use_local_scope
,
bool
used_for_jit
)
{
Scope
*
local_scope
=
use_local_scope
?
var_scope
->
GetMutableLocalScope
()
:
var_scope
->
GetMutableScope
();
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_unique
;
// its elements will be moved to vec_func_list
// Step 1: create all ops for current block.
create_all_o
ps
(
block
,
&
ops_unique
);
CreateAllO
ps
(
block
,
&
ops_unique
);
if
(
!
used_for_jit
)
{
// If gc is enabled and block size > 1
...
...
@@ -415,7 +415,7 @@ void build_op_func_list(const platform::Place& place,
for
(
auto
&
op_unique
:
ops_unique
)
{
ops
.
emplace_back
(
std
::
move
(
op_unique
));
}
auto
unused_var_map
=
get_unused_v
ars
(
block
,
ops
);
auto
unused_var_map
=
GetUnusedV
ars
(
block
,
ops
);
bool
flag_log_is_printed
=
false
;
for
(
size_t
i
=
0
;
i
<
ops
.
size
();
++
i
)
{
...
...
@@ -485,10 +485,10 @@ void build_op_func_list(const platform::Place& place,
try
{
if
(
dynamic_cast
<
framework
::
OperatorWithKernel
*>
(
op
)
==
nullptr
)
{
VLOG
(
4
)
<<
"HandleOperatorBase"
;
// op is not a operatorwithkernel, so direcly run OperatorBase::Run()
deal_operator_b
ase
(
HandleOperatorB
ase
(
place
,
var_scope
,
ops
[
i
],
&
op_func_node
,
local_scope
);
VLOG
(
4
)
<<
"deal_operator_base"
;
}
else
{
VLOG
(
4
)
<<
"OP is not null"
;
auto
op_with_kernel
=
const_cast
<
framework
::
OperatorWithKernel
*>
(
...
...
@@ -522,7 +522,7 @@ void build_op_func_list(const platform::Place& place,
op_with_kernel
->
GetExpectedKernelType
(
exec_ctx
);
VLOG
(
4
)
<<
"get expected_kernel_key"
;
// change device by the device_guard()
apply_device_g
uard
(
op
,
place
,
&
expected_kernel_key
);
ApplyDeviceG
uard
(
op
,
place
,
&
expected_kernel_key
);
VLOG
(
4
)
<<
"expected_kernel_key : "
<<
expected_kernel_key
;
// step 2. select op kernel
...
...
@@ -565,7 +565,7 @@ void build_op_func_list(const platform::Place& place,
dev_ctx
=
pool
.
Get
(
kernel_type
.
place_
);
}
op_func_node
.
dev_ctx_
=
dev_ctx
;
if
(
IsSupportedHetePlace
(
kernel_type
.
place_
))
{
if
(
IsSupportedHete
r
Place
(
kernel_type
.
place_
))
{
op_func_node
.
type_
=
OpFuncType
::
kQueueAsync
;
}
else
if
(
platform
::
is_cpu_place
(
kernel_type
.
place_
))
{
op_func_node
.
type_
=
OpFuncType
::
kQueueSync
;
...
...
@@ -667,7 +667,7 @@ void build_op_func_list(const platform::Place& place,
vec_func_list
->
emplace_back
(
op_func_node
);
// gc---------------------------------------------
------------------------------
// gc---------------------------------------------
auto
iter
=
unused_var_map
.
find
(
op
);
if
(
iter
==
unused_var_map
.
end
())
{
interpreter
::
LogDeviceMemoryStats
(
place
);
...
...
@@ -702,8 +702,8 @@ void build_op_func_list(const platform::Place& place,
memory
::
Release
(
place
);
}
void
add_f
etch
(
const
std
::
vector
<
std
::
string
>&
fetch_names
,
framework
::
BlockDesc
*
block
)
{
void
AddF
etch
(
const
std
::
vector
<
std
::
string
>&
fetch_names
,
framework
::
BlockDesc
*
block
)
{
auto
*
fetch_holder
=
block
->
Var
(
kFetchVarName
);
fetch_holder
->
SetType
(
proto
::
VarType
::
FETCH_LIST
);
fetch_holder
->
SetPersistable
(
true
);
...
...
@@ -721,20 +721,6 @@ void add_fetch(const std::vector<std::string>& fetch_names,
}
}
std
::
vector
<
size_t
>
merge_vector
(
const
std
::
vector
<
size_t
>&
first
,
const
std
::
vector
<
size_t
>&
second
)
{
std
::
vector
<
size_t
>
out
(
first
.
size
()
+
second
.
size
());
std
::
merge
(
first
.
begin
(),
first
.
end
(),
second
.
begin
(),
second
.
end
(),
out
.
begin
());
std
::
vector
<
size_t
>::
iterator
it
;
it
=
std
::
unique
(
out
.
begin
(),
out
.
end
());
out
.
resize
(
std
::
distance
(
out
.
begin
(),
it
));
return
out
;
}
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/interpretercore_util.h
浏览文件 @
5303b66b
...
...
@@ -66,23 +66,20 @@ class AsyncWorkQueue {
void
LogDeviceMemoryStats
(
const
platform
::
Place
&
place
);
void
build_variable_scope
(
const
framework
::
BlockDesc
&
block
,
VariableScope
*
var_scope
,
bool
use_local_scope
=
true
);
void
build_op_func_list
(
const
platform
::
Place
&
place
,
const
framework
::
BlockDesc
&
block
,
const
std
::
set
<
std
::
string
>&
skip_gc_vars
,
std
::
vector
<
OpFuncNode
>*
vec_func_list
,
VariableScope
*
scope
,
bool
use_local_scope
=
true
,
bool
used_for_jit
=
false
);
void
add_fetch
(
const
std
::
vector
<
std
::
string
>&
fetch_names
,
framework
::
BlockDesc
*
block
);
std
::
vector
<
size_t
>
merge_vector
(
const
std
::
vector
<
size_t
>&
first
,
const
std
::
vector
<
size_t
>&
second
);
void
BuildVariableScope
(
const
framework
::
BlockDesc
&
block
,
VariableScope
*
var_scope
,
bool
use_local_scope
=
true
);
void
BuildOpFuncList
(
const
platform
::
Place
&
place
,
const
framework
::
BlockDesc
&
block
,
const
std
::
set
<
std
::
string
>&
skip_gc_vars
,
std
::
vector
<
OpFuncNode
>*
vec_func_list
,
VariableScope
*
scope
,
bool
use_local_scope
=
true
,
bool
used_for_jit
=
false
);
void
AddFetch
(
const
std
::
vector
<
std
::
string
>&
fetch_names
,
framework
::
BlockDesc
*
block
);
}
// namespace interpreter
}
// namespace framework
...
...
paddle/fluid/framework/new_executor/new_executor_defs.h
浏览文件 @
5303b66b
...
...
@@ -392,7 +392,7 @@ static bool IsCpuOp(const Instruction& instr) {
}
// is supported heterogeneous place
static
bool
IsSupportedHetePlace
(
const
phi
::
Place
&
place
)
{
static
bool
IsSupportedHete
r
Place
(
const
phi
::
Place
&
place
)
{
return
platform
::
is_gpu_place
(
place
)
||
platform
::
is_npu_place
(
place
)
||
platform
::
is_xpu_place
(
place
)
||
platform
::
is_ipu_place
(
place
)
||
platform
::
is_custom_place
(
place
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录