Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8b2c906a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b2c906a
编写于
11月 10, 2021
作者:
A
Aurelius84
提交者:
GitHub
11月 10, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Simplify constructor of InterpreterCore (#37072)
* Simplify constructor of InterpreterCore * fix bool * clean code
上级
76d2fd1d
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
100 addition
and
157 deletion
+100
-157
paddle/fluid/framework/new_executor/event_manager.cc
paddle/fluid/framework/new_executor/event_manager.cc
+4
-5
paddle/fluid/framework/new_executor/event_manager.h
paddle/fluid/framework/new_executor/event_manager.h
+4
-7
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+44
-58
paddle/fluid/framework/new_executor/interpretercore.h
paddle/fluid/framework/new_executor/interpretercore.h
+12
-16
paddle/fluid/framework/new_executor/new_executor_defs.h
paddle/fluid/framework/new_executor/new_executor_defs.h
+6
-0
paddle/fluid/framework/new_executor/profiler.h
paddle/fluid/framework/new_executor/profiler.h
+15
-58
paddle/fluid/framework/new_executor/standalone_executor.cc
paddle/fluid/framework/new_executor/standalone_executor.cc
+5
-6
paddle/fluid/framework/new_executor/standalone_executor.h
paddle/fluid/framework/new_executor/standalone_executor.h
+3
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-5
未找到文件。
paddle/fluid/framework/new_executor/event_manager.cc
浏览文件 @
8b2c906a
...
...
@@ -16,9 +16,8 @@
namespace
paddle
{
namespace
framework
{
void
EventManager
::
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
namespace
interpreter
{
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
// If InterpreterCore in on CPUPlace, do nothing.
if
(
platform
::
is_cpu_place
(
place
))
return
;
...
...
@@ -32,8 +31,7 @@ void EventManager::WaitEvent(const Instruction& instruction,
}
}
void
EventManager
::
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
// If InterpreterCore in on CPUPlace, do nothing.
if
(
platform
::
is_cpu_place
(
place
))
return
;
...
...
@@ -43,5 +41,6 @@ void EventManager::RecordEvent(const Instruction& instruction,
}
}
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/event_manager.h
浏览文件 @
8b2c906a
...
...
@@ -17,14 +17,11 @@
namespace
paddle
{
namespace
framework
{
namespace
interpreter
{
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
class
EventManager
{
public:
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
};
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
8b2c906a
...
...
@@ -33,9 +33,9 @@ namespace framework {
// NOTE(Aurelius84): Need a better strategy to determine it.
static
constexpr
size_t
kHostNumThreads
=
4
;
InterpreterCore
::
InterpreterCore
(
const
platform
::
Place
&
place
,
BlockDesc
*
block
,
VariableScope
*
global_scope
,
const
std
::
vector
<
std
::
string
>&
feed_names
)
InterpreterCore
::
InterpreterCore
(
const
platform
::
Place
&
place
,
const
BlockDesc
&
block
,
VariableScope
*
global_scope
)
:
place_
(
place
),
block_
(
block
),
global_scope_
(
global_scope
),
...
...
@@ -45,8 +45,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, BlockDesc* block,
new
interpreter
::
AsyncWorkQueue
(
kHostNumThreads
,
&
main_thread_blocker_
));
gc_
.
reset
(
new
InterpreterCoreGarbageCollector
());
feed_names_
=
feed_names
;
exception_notifier_
=
main_thread_blocker_
.
RegisterEvent
(
kExceptionCaught
,
[
this
]()
{
return
exception_holder_
.
IsCaught
();
});
...
...
@@ -65,27 +63,12 @@ InterpreterCore::~InterpreterCore() {
}
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
Var
(
feed_names_
[
i
]);
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
set_lod
(
feed_tensors
[
i
].
lod
());
}
};
bool
is_build
=
is_build_
;
Prepare
(
feed_names
,
feed_tensors
,
is_build
);
if
(
is_build_
==
false
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
*
block_
,
global_scope_
);
FeedInput
();
paddle
::
framework
::
interpreter
::
build_op_func_list
(
place_
,
*
block_
,
&
vec_func_list_
,
global_scope_
);
is_build_
=
true
;
// convert vec func_list to graph
Convert
();
}
else
{
FeedInput
();
if
(
is_build
)
{
ExecuteInstructionList
(
vec_instruction_
);
}
...
...
@@ -95,9 +78,9 @@ paddle::framework::FetchList InterpreterCore::Run(
}
void
InterpreterCore
::
Convert
()
{
auto
&
vec_meta_info
=
global_scope_
->
MutableVecMetaInfo
();
auto
var_nums
=
global_scope_
->
VarSize
();
input_var2op_info_
.
resize
(
var_nums
);
vec_meta_info_
.
resize
(
var_nums
);
auto
op_nums
=
vec_func_list_
.
size
();
vec_instruction_
.
reserve
(
op_nums
);
...
...
@@ -136,7 +119,7 @@ void InterpreterCore::Convert() {
gc_check_input_list
.
erase
(
last
,
gc_check_input_list
.
end
());
for
(
auto
var_id
:
gc_check_input_list
)
{
vec_meta_info
_
[
var_id
].
var_ref_count_
++
;
vec_meta_info
[
var_id
].
var_ref_count_
++
;
instr
.
AddGCCheckVar
(
var_id
);
}
}
...
...
@@ -148,7 +131,7 @@ void InterpreterCore::Convert() {
if
(
input_var2op_info_
.
at
(
id
).
size
()
==
0
)
{
// output var not be used by any kernel
vec_instruction_
[
i
].
AddGCCheckVar
(
id
);
vec_meta_info
_
[
id
].
var_ref_count_
++
;
vec_meta_info
[
id
].
var_ref_count_
++
;
}
}
}
...
...
@@ -180,7 +163,7 @@ void InterpreterCore::Convert() {
}
for
(
size_t
i
=
0
;
i
<
vec_instruction_
.
size
();
++
i
)
{
BuildAndCacheInstructionCtx
(
&
vec_instruction_
[
i
]
,
*
global_scope_
,
place_
);
BuildAndCacheInstructionCtx
(
&
vec_instruction_
[
i
]);
}
BuildSkipShareLoDInfo
();
...
...
@@ -248,16 +231,14 @@ void InterpreterCore::BuildInplace() {
}
}
void
InterpreterCore
::
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
,
const
VariableScope
&
var_scope
,
const
platform
::
Place
&
place
)
{
void
InterpreterCore
::
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
)
{
VariableValueMap
ins_map
;
for
(
auto
&
var_name_item
:
instr_node
->
Inputs
())
{
std
::
vector
<
Variable
*>
input_vars
;
input_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
id
:
var_name_item
.
second
)
{
input_vars
.
emplace_back
(
var_scope
.
Var
(
id
));
input_vars
.
emplace_back
(
global_scope_
->
Var
(
id
));
}
ins_map
.
emplace
(
var_name_item
.
first
,
std
::
move
(
input_vars
));
}
...
...
@@ -268,7 +249,7 @@ void InterpreterCore::BuildAndCacheInstructionCtx(
out_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
id
:
var_name_item
.
second
)
{
out_vars
.
emplace_back
(
var_scope
.
Var
(
id
));
out_vars
.
emplace_back
(
global_scope_
->
Var
(
id
));
}
outs_map
.
emplace
(
var_name_item
.
first
,
std
::
move
(
out_vars
));
}
...
...
@@ -359,7 +340,7 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
void
InterpreterCore
::
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
)
{
async_work_queue_
->
PrepareAtomicDeps
(
dependecy_count_
);
async_work_queue_
->
PrepareAtomicVarRef
(
vec_meta_info_
);
async_work_queue_
->
PrepareAtomicVarRef
(
global_scope_
->
VecMetaInfo
()
);
op_run_number_
=
0
;
exception_holder_
.
Clear
();
...
...
@@ -452,7 +433,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
auto
&
instr_node
=
vec_instruction_
.
at
(
instr_id
);
auto
*
op
=
instr_node
.
OpBase
();
platform
::
RecordEvent
instruction_event
(
op
->
Type
());
event_manager_
.
WaitEvent
(
instr_node
,
place_
);
interpreter
::
WaitEvent
(
instr_node
,
place_
);
try
{
RunInstruction
(
instr_node
);
...
...
@@ -479,7 +460,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
return
;
}
event_manager_
.
RecordEvent
(
instr_node
,
place_
);
interpreter
::
RecordEvent
(
instr_node
,
place_
);
op_run_number_
.
fetch_add
(
1
,
std
::
memory_order_relaxed
);
// GC infomation
...
...
@@ -508,11 +489,18 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
}
}
void
InterpreterCore
::
DryRunPrepare
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
void
InterpreterCore
::
Prepare
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
bool
prepare_feed
)
{
PADDLE_ENFORCE_EQ
(
feed_names
.
size
(),
feed_tensors
.
size
(),
platform
::
errors
::
PreconditionNotMet
(
"Required feed_names.size() == feed_tensors.size(), "
"but received %d != %d"
,
feed_names
.
size
(),
feed_tensors
.
size
()));
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names
_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names
_
[
i
]);
for
(
size_t
i
=
0
;
i
<
feed_names
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
feed_var
,
platform
::
errors
::
NotFound
(
"feed_var shall not be nullptr."
));
...
...
@@ -522,35 +510,33 @@ void InterpreterCore::DryRunPrepare(
}
};
if
(
is_build_
==
false
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
*
block_
,
global_scope_
);
if
(
!
is_build_
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
block_
,
global_scope_
);
FeedInput
();
paddle
::
framework
::
interpreter
::
build_op_func_list
(
place_
,
*
block_
,
&
vec_func_list_
,
global_scope_
);
place_
,
block_
,
&
vec_func_list_
,
global_scope_
);
is_build_
=
true
;
// convert vec func_list to graph
Convert
();
}
// NOTE: Because feed_tensor will be GC after
// paddle::framework::build_op_func_list, so we should
// call
// FeedInput again.
FeedInput
();
// call FeedInput again.
if
(
prepare_feed
)
FeedInput
();
}
const
CostInfo
&
InterpreterCore
::
DryRun
(
interpreter
::
CostInfo
InterpreterCore
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
DryRunPrepare
(
feed_tensors
);
// DryRun may be called many times.
dry_run_profiler_
.
Reset
();
dry_run_profiler_
.
Start
();
ExecuteInstructionList
(
vec_instruction_
);
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
dry_run_profiler_
.
Pause
();
dry_run_profiler_
.
TotalCUDAAllocatedMemorySize
(
place_
);
return
dry_run_profiler_
.
GetCostInfo
();
Prepare
(
feed_names
,
feed_tensors
,
true
);
interpreter
::
CostInfo
cost_info
;
{
interpreter
::
ProfilerGuard
(
place_
,
&
cost_info
);
ExecuteInstructionList
(
vec_instruction_
);
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
}
return
cost_info
;
}
}
// namespace framework
...
...
paddle/fluid/framework/new_executor/interpretercore.h
浏览文件 @
8b2c906a
...
...
@@ -40,23 +40,23 @@ using AtomicVectorSizeT = std::vector<std::unique_ptr<std::atomic<size_t>>>;
class
InterpreterCore
{
public:
InterpreterCore
(
const
platform
::
Place
&
place
,
BlockDesc
*
block
,
VariableScope
*
global_scope
,
const
std
::
vector
<
std
::
string
>&
feed_names
);
InterpreterCore
(
const
platform
::
Place
&
place
,
const
BlockDesc
&
block
,
VariableScope
*
global_scope
);
~
InterpreterCore
();
paddle
::
framework
::
FetchList
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
interpreter
::
CostInfo
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
private:
void
Convert
();
void
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
,
const
VariableScope
&
var_scope
,
const
platform
::
Place
&
place
);
void
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
);
void
BuildInplace
();
...
...
@@ -66,7 +66,9 @@ class InterpreterCore {
void
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
);
void
DryRunPrepare
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
void
Prepare
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
bool
prepare_feed
);
void
CheckGC
(
const
Instruction
&
instr
);
...
...
@@ -79,22 +81,17 @@ class InterpreterCore {
bool
is_build_
;
const
platform
::
Place
&
place_
;
BlockDesc
*
block_
;
// not owned
const
BlockDesc
&
block_
;
// not owned
VariableScope
*
global_scope_
;
// not owned
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>
vec_func_list_
;
std
::
vector
<
Instruction
>
vec_instruction_
;
// deconstruct before OpFuncNode
InstructionInfo
instruction_info_
;
std
::
vector
<
size_t
>
dependecy_count_
;
std
::
atomic
<
size_t
>
op_run_number_
{
0
};
std
::
vector
<
std
::
vector
<
size_t
>>
input_var2op_info_
;
std
::
vector
<
VariableMetaInfo
>
vec_meta_info_
;
std
::
vector
<
std
::
string
>
feed_names_
;
InterpreterProfiler
dry_run_profiler_
;
StreamAnalyzer
stream_analyzer_
;
EventManager
event_manager_
;
EventsWaiter
main_thread_blocker_
;
std
::
unique_ptr
<
interpreter
::
AsyncWorkQueue
>
async_work_queue_
;
details
::
ExceptionHolder
exception_holder_
;
...
...
@@ -102,7 +99,6 @@ class InterpreterCore {
std
::
unique_ptr
<
InterpreterCoreGarbageCollector
>
gc_
;
std
::
vector
<
paddle
::
platform
::
DeviceEvent
>
gc_event_
;
std
::
atomic
<
size_t
>
op_run_number_
{
0
};
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/new_executor_defs.h
浏览文件 @
8b2c906a
...
...
@@ -607,6 +607,12 @@ class VariableScope : public ScopeBase {
platform
::
errors
::
NotFound
(
"%s not in VariableScope."
,
name
));
}
std
::
vector
<
VariableMetaInfo
>&
MutableVecMetaInfo
()
{
return
vec_meta_info_
;
}
const
std
::
vector
<
VariableMetaInfo
>&
VecMetaInfo
()
const
{
return
vec_meta_info_
;
}
private:
std
::
vector
<
Variable
*>
var_list_
;
std
::
map
<
std
::
string
,
int
>
name2id_
;
...
...
paddle/fluid/framework/new_executor/profiler.h
浏览文件 @
8b2c906a
...
...
@@ -20,84 +20,41 @@
namespace
paddle
{
namespace
framework
{
static
void
GetTensors
(
Variable
*
var
,
std
::
unordered_set
<
Tensor
*>*
tensor_set
)
{
if
(
var
->
IsType
<
LoDTensor
>
()
&&
var
->
Get
<
LoDTensor
>
().
IsInitialized
())
{
tensor_set
->
insert
(
var
->
GetMutable
<
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
()
&&
var
->
Get
<
SelectedRows
>
().
value
().
IsInitialized
())
{
tensor_set
->
insert
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
tensor_arr
)
{
if
(
t
.
IsInitialized
())
{
tensor_set
->
insert
(
&
t
);
}
}
}
}
static
std
::
pair
<
size_t
,
size_t
>
GetTensorMemorySize
(
const
std
::
vector
<
Variable
*>&
var_list
)
{
std
::
unordered_set
<
Tensor
*>
tensor_set
;
for
(
auto
*
var
:
var_list
)
{
GetTensors
(
var
,
&
tensor_set
);
}
size_t
host_memory_bytes
=
0
;
size_t
device_memory_bytes
=
0
;
std
::
unordered_set
<
memory
::
Allocation
*>
allocation_set
;
for
(
auto
*
tensor
:
tensor_set
)
{
auto
allocation
=
tensor
->
Holder
().
get
();
if
(
!
allocation_set
.
count
(
allocation
))
{
allocation_set
.
insert
(
allocation
);
if
(
platform
::
is_cuda_pinned_place
(
tensor
->
place
())
||
platform
::
is_cpu_place
(
tensor
->
place
()))
{
VLOG
(
3
)
<<
"found host memory : "
<<
allocation
->
size
();
host_memory_bytes
+=
allocation
->
size
();
}
else
{
VLOG
(
3
)
<<
"found device memory : "
<<
allocation
->
size
();
device_memory_bytes
+=
allocation
->
size
();
}
}
}
return
{
host_memory_bytes
,
device_memory_bytes
};
}
namespace
interpreter
{
struct
CostInfo
{
double
total_time
{
0.
};
// ms
size_t
device_memory_bytes
{
0
};
// total allocated memory size
};
class
InterpreterProfiler
{
class
ProfilerGuard
{
public:
void
Start
()
{
timer_
.
Start
();
}
void
Pause
()
{
timer_
.
Pause
();
cost_info_
.
total_time
+=
timer_
.
ElapsedMS
();
ProfilerGuard
(
const
platform
::
Place
&
place
,
CostInfo
*
cost_info
)
:
place_
(
place
),
cost_info_
(
cost_info
)
{
timer_
.
Start
();
}
void
Reset
()
{
timer_
.
Reset
();
cost_info_
.
total_time
=
0.
;
cost_info_
.
device_memory_bytes
=
0
;
~
ProfilerGuard
()
{
timer_
.
Pause
();
cost_info_
->
total_time
+=
timer_
.
ElapsedMS
()
;
TotalCUDAAllocatedMemorySize
(
place_
)
;
}
private:
void
TotalCUDAAllocatedMemorySize
(
const
platform
::
Place
&
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
cuda_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
);
cost_info_
.
device_memory_bytes
=
cost_info_
->
device_memory_bytes
=
platform
::
RecordedCudaMallocSize
(
cuda_place
.
device
);
#endif
}
}
const
CostInfo
&
GetCostInfo
()
const
{
return
cost_info_
;
}
private:
const
platform
::
Place
&
place_
;
CostInfo
*
cost_info_
;
platform
::
Timer
timer_
;
CostInfo
cost_info_
;
};
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/standalone_executor.cc
浏览文件 @
8b2c906a
...
...
@@ -51,16 +51,15 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
);
return
core
->
Run
(
feed_tensors
);
return
core
->
Run
(
feed_
names
,
feed_
tensors
);
}
const
CostInfo
&
StandaloneExecutor
::
DryRun
(
framework
::
interpreter
::
CostInfo
StandaloneExecutor
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
{});
auto
&
cost_info
=
core
->
DryRun
(
feed_tensors
);
return
cost_info
;
return
core
->
DryRun
(
feed_names
,
feed_tensors
);
}
void
StandaloneExecutor
::
BuildVariableOuterScope
(
...
...
@@ -102,8 +101,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
auto
*
block
=
new_prog
->
MutableBlock
(
0
);
interpreter
::
add_fetch
(
fetch_names
,
block
);
auto
core
=
std
::
make_shared
<
InterpreterCore
>
(
place_
,
block
,
&
global_scope_
,
feed_names
);
auto
core
=
std
::
make_shared
<
InterpreterCore
>
(
place_
,
*
block
,
&
global_scope_
);
programs_
.
emplace
(
oss
.
str
(),
new_prog
);
interpretercores_
.
emplace
(
oss
.
str
(),
core
);
return
core
;
...
...
paddle/fluid/framework/new_executor/standalone_executor.h
浏览文件 @
8b2c906a
...
...
@@ -45,8 +45,9 @@ class StandaloneExecutor : public ExecutorBase {
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
framework
::
interpreter
::
CostInfo
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
private:
void
BuildVariableOuterScope
(
const
framework
::
ProgramDesc
&
pdesc
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
8b2c906a
...
...
@@ -2069,11 +2069,13 @@ All parameter, weight, gradient are variables in Paddle.
fetch_vars
);
});
py
::
class_
<
framework
::
CostInfo
>
(
m
,
"CostInfo"
)
py
::
class_
<
framework
::
interpreter
::
CostInfo
>
(
m
,
"CostInfo"
)
.
def
(
py
::
init
<>
())
.
def
(
"total_time"
,
[](
CostInfo
&
self
)
{
return
self
.
total_time
;
})
.
def
(
"device_memory_bytes"
,
[](
CostInfo
&
self
)
{
return
self
.
device_memory_bytes
;
});
.
def
(
"total_time"
,
[](
interpreter
::
CostInfo
&
self
)
{
return
self
.
total_time
;
})
.
def
(
"device_memory_bytes"
,
[](
interpreter
::
CostInfo
&
self
)
{
return
self
.
device_memory_bytes
;
});
py
::
class_
<
framework
::
StandaloneExecutor
>
(
m
,
"StandaloneExecutor"
)
.
def
(
py
::
init
<
const
platform
::
Place
&
,
const
ProgramDesc
&
,
...
...
@@ -2134,7 +2136,7 @@ All parameter, weight, gradient are variables in Paddle.
feed_tensors
.
push_back
(
t
);
}
CostInfo
cost_info
;
framework
::
interpreter
::
CostInfo
cost_info
;
{
pybind11
::
gil_scoped_release
release
;
cost_info
=
self
.
DryRun
(
feed_names
,
feed_tensors
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录