Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8b2c906a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b2c906a
编写于
11月 10, 2021
作者:
A
Aurelius84
提交者:
GitHub
11月 10, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Simplify constructor of InterpreterCore (#37072)
* Simplify constructor of InterpreterCore * fix bool * clean code
上级
76d2fd1d
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
100 addition
and
157 deletion
+100
-157
paddle/fluid/framework/new_executor/event_manager.cc
paddle/fluid/framework/new_executor/event_manager.cc
+4
-5
paddle/fluid/framework/new_executor/event_manager.h
paddle/fluid/framework/new_executor/event_manager.h
+4
-7
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+44
-58
paddle/fluid/framework/new_executor/interpretercore.h
paddle/fluid/framework/new_executor/interpretercore.h
+12
-16
paddle/fluid/framework/new_executor/new_executor_defs.h
paddle/fluid/framework/new_executor/new_executor_defs.h
+6
-0
paddle/fluid/framework/new_executor/profiler.h
paddle/fluid/framework/new_executor/profiler.h
+15
-58
paddle/fluid/framework/new_executor/standalone_executor.cc
paddle/fluid/framework/new_executor/standalone_executor.cc
+5
-6
paddle/fluid/framework/new_executor/standalone_executor.h
paddle/fluid/framework/new_executor/standalone_executor.h
+3
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-5
未找到文件。
paddle/fluid/framework/new_executor/event_manager.cc
浏览文件 @
8b2c906a
...
...
@@ -16,9 +16,8 @@
namespace
paddle
{
namespace
framework
{
void
EventManager
::
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
namespace
interpreter
{
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
// If InterpreterCore in on CPUPlace, do nothing.
if
(
platform
::
is_cpu_place
(
place
))
return
;
...
...
@@ -32,8 +31,7 @@ void EventManager::WaitEvent(const Instruction& instruction,
}
}
void
EventManager
::
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
)
{
// If InterpreterCore in on CPUPlace, do nothing.
if
(
platform
::
is_cpu_place
(
place
))
return
;
...
...
@@ -43,5 +41,6 @@ void EventManager::RecordEvent(const Instruction& instruction,
}
}
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/event_manager.h
浏览文件 @
8b2c906a
...
...
@@ -17,14 +17,11 @@
namespace
paddle
{
namespace
framework
{
namespace
interpreter
{
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
class
EventManager
{
public:
void
RecordEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
};
void
WaitEvent
(
const
Instruction
&
instruction
,
const
platform
::
Place
&
place
);
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
8b2c906a
...
...
@@ -33,9 +33,9 @@ namespace framework {
// NOTE(Aurelius84): Need a better strategy to determine it.
static
constexpr
size_t
kHostNumThreads
=
4
;
InterpreterCore
::
InterpreterCore
(
const
platform
::
Place
&
place
,
BlockDesc
*
block
,
VariableScope
*
global_scope
,
const
std
::
vector
<
std
::
string
>&
feed_names
)
InterpreterCore
::
InterpreterCore
(
const
platform
::
Place
&
place
,
const
BlockDesc
&
block
,
VariableScope
*
global_scope
)
:
place_
(
place
),
block_
(
block
),
global_scope_
(
global_scope
),
...
...
@@ -45,8 +45,6 @@ InterpreterCore::InterpreterCore(const platform::Place& place, BlockDesc* block,
new
interpreter
::
AsyncWorkQueue
(
kHostNumThreads
,
&
main_thread_blocker_
));
gc_
.
reset
(
new
InterpreterCoreGarbageCollector
());
feed_names_
=
feed_names
;
exception_notifier_
=
main_thread_blocker_
.
RegisterEvent
(
kExceptionCaught
,
[
this
]()
{
return
exception_holder_
.
IsCaught
();
});
...
...
@@ -65,27 +63,12 @@ InterpreterCore::~InterpreterCore() {
}
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
Var
(
feed_names_
[
i
]);
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
set_lod
(
feed_tensors
[
i
].
lod
());
}
};
bool
is_build
=
is_build_
;
Prepare
(
feed_names
,
feed_tensors
,
is_build
);
if
(
is_build_
==
false
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
*
block_
,
global_scope_
);
FeedInput
();
paddle
::
framework
::
interpreter
::
build_op_func_list
(
place_
,
*
block_
,
&
vec_func_list_
,
global_scope_
);
is_build_
=
true
;
// convert vec func_list to graph
Convert
();
}
else
{
FeedInput
();
if
(
is_build
)
{
ExecuteInstructionList
(
vec_instruction_
);
}
...
...
@@ -95,9 +78,9 @@ paddle::framework::FetchList InterpreterCore::Run(
}
void
InterpreterCore
::
Convert
()
{
auto
&
vec_meta_info
=
global_scope_
->
MutableVecMetaInfo
();
auto
var_nums
=
global_scope_
->
VarSize
();
input_var2op_info_
.
resize
(
var_nums
);
vec_meta_info_
.
resize
(
var_nums
);
auto
op_nums
=
vec_func_list_
.
size
();
vec_instruction_
.
reserve
(
op_nums
);
...
...
@@ -136,7 +119,7 @@ void InterpreterCore::Convert() {
gc_check_input_list
.
erase
(
last
,
gc_check_input_list
.
end
());
for
(
auto
var_id
:
gc_check_input_list
)
{
vec_meta_info
_
[
var_id
].
var_ref_count_
++
;
vec_meta_info
[
var_id
].
var_ref_count_
++
;
instr
.
AddGCCheckVar
(
var_id
);
}
}
...
...
@@ -148,7 +131,7 @@ void InterpreterCore::Convert() {
if
(
input_var2op_info_
.
at
(
id
).
size
()
==
0
)
{
// output var not be used by any kernel
vec_instruction_
[
i
].
AddGCCheckVar
(
id
);
vec_meta_info
_
[
id
].
var_ref_count_
++
;
vec_meta_info
[
id
].
var_ref_count_
++
;
}
}
}
...
...
@@ -180,7 +163,7 @@ void InterpreterCore::Convert() {
}
for
(
size_t
i
=
0
;
i
<
vec_instruction_
.
size
();
++
i
)
{
BuildAndCacheInstructionCtx
(
&
vec_instruction_
[
i
]
,
*
global_scope_
,
place_
);
BuildAndCacheInstructionCtx
(
&
vec_instruction_
[
i
]);
}
BuildSkipShareLoDInfo
();
...
...
@@ -248,16 +231,14 @@ void InterpreterCore::BuildInplace() {
}
}
void
InterpreterCore
::
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
,
const
VariableScope
&
var_scope
,
const
platform
::
Place
&
place
)
{
void
InterpreterCore
::
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
)
{
VariableValueMap
ins_map
;
for
(
auto
&
var_name_item
:
instr_node
->
Inputs
())
{
std
::
vector
<
Variable
*>
input_vars
;
input_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
id
:
var_name_item
.
second
)
{
input_vars
.
emplace_back
(
var_scope
.
Var
(
id
));
input_vars
.
emplace_back
(
global_scope_
->
Var
(
id
));
}
ins_map
.
emplace
(
var_name_item
.
first
,
std
::
move
(
input_vars
));
}
...
...
@@ -268,7 +249,7 @@ void InterpreterCore::BuildAndCacheInstructionCtx(
out_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
id
:
var_name_item
.
second
)
{
out_vars
.
emplace_back
(
var_scope
.
Var
(
id
));
out_vars
.
emplace_back
(
global_scope_
->
Var
(
id
));
}
outs_map
.
emplace
(
var_name_item
.
first
,
std
::
move
(
out_vars
));
}
...
...
@@ -359,7 +340,7 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
void
InterpreterCore
::
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
)
{
async_work_queue_
->
PrepareAtomicDeps
(
dependecy_count_
);
async_work_queue_
->
PrepareAtomicVarRef
(
vec_meta_info_
);
async_work_queue_
->
PrepareAtomicVarRef
(
global_scope_
->
VecMetaInfo
()
);
op_run_number_
=
0
;
exception_holder_
.
Clear
();
...
...
@@ -452,7 +433,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
auto
&
instr_node
=
vec_instruction_
.
at
(
instr_id
);
auto
*
op
=
instr_node
.
OpBase
();
platform
::
RecordEvent
instruction_event
(
op
->
Type
());
event_manager_
.
WaitEvent
(
instr_node
,
place_
);
interpreter
::
WaitEvent
(
instr_node
,
place_
);
try
{
RunInstruction
(
instr_node
);
...
...
@@ -479,7 +460,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
return
;
}
event_manager_
.
RecordEvent
(
instr_node
,
place_
);
interpreter
::
RecordEvent
(
instr_node
,
place_
);
op_run_number_
.
fetch_add
(
1
,
std
::
memory_order_relaxed
);
// GC infomation
...
...
@@ -508,11 +489,18 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
}
}
void
InterpreterCore
::
DryRunPrepare
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
void
InterpreterCore
::
Prepare
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
bool
prepare_feed
)
{
PADDLE_ENFORCE_EQ
(
feed_names
.
size
(),
feed_tensors
.
size
(),
platform
::
errors
::
PreconditionNotMet
(
"Required feed_names.size() == feed_tensors.size(), "
"but received %d != %d"
,
feed_names
.
size
(),
feed_tensors
.
size
()));
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names
_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names
_
[
i
]);
for
(
size_t
i
=
0
;
i
<
feed_names
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
feed_var
,
platform
::
errors
::
NotFound
(
"feed_var shall not be nullptr."
));
...
...
@@ -522,35 +510,33 @@ void InterpreterCore::DryRunPrepare(
}
};
if
(
is_build_
==
false
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
*
block_
,
global_scope_
);
if
(
!
is_build_
)
{
paddle
::
framework
::
interpreter
::
build_variable_scope
(
block_
,
global_scope_
);
FeedInput
();
paddle
::
framework
::
interpreter
::
build_op_func_list
(
place_
,
*
block_
,
&
vec_func_list_
,
global_scope_
);
place_
,
block_
,
&
vec_func_list_
,
global_scope_
);
is_build_
=
true
;
// convert vec func_list to graph
Convert
();
}
// NOTE: Because feed_tensor will be GC after
// paddle::framework::build_op_func_list, so we should
// call
// FeedInput again.
FeedInput
();
// call FeedInput again.
if
(
prepare_feed
)
FeedInput
();
}
const
CostInfo
&
InterpreterCore
::
DryRun
(
interpreter
::
CostInfo
InterpreterCore
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
DryRunPrepare
(
feed_tensors
);
// DryRun may be called many times.
dry_run_profiler_
.
Reset
();
dry_run_profiler_
.
Start
();
ExecuteInstructionList
(
vec_instruction_
);
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
dry_run_profiler_
.
Pause
();
dry_run_profiler_
.
TotalCUDAAllocatedMemorySize
(
place_
);
return
dry_run_profiler_
.
GetCostInfo
();
Prepare
(
feed_names
,
feed_tensors
,
true
);
interpreter
::
CostInfo
cost_info
;
{
interpreter
::
ProfilerGuard
(
place_
,
&
cost_info
);
ExecuteInstructionList
(
vec_instruction_
);
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
}
return
cost_info
;
}
}
// namespace framework
...
...
paddle/fluid/framework/new_executor/interpretercore.h
浏览文件 @
8b2c906a
...
...
@@ -40,23 +40,23 @@ using AtomicVectorSizeT = std::vector<std::unique_ptr<std::atomic<size_t>>>;
class
InterpreterCore
{
public:
InterpreterCore
(
const
platform
::
Place
&
place
,
BlockDesc
*
block
,
VariableScope
*
global_scope
,
const
std
::
vector
<
std
::
string
>&
feed_names
);
InterpreterCore
(
const
platform
::
Place
&
place
,
const
BlockDesc
&
block
,
VariableScope
*
global_scope
);
~
InterpreterCore
();
paddle
::
framework
::
FetchList
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
interpreter
::
CostInfo
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
private:
void
Convert
();
void
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
,
const
VariableScope
&
var_scope
,
const
platform
::
Place
&
place
);
void
BuildAndCacheInstructionCtx
(
Instruction
*
instr_node
);
void
BuildInplace
();
...
...
@@ -66,7 +66,9 @@ class InterpreterCore {
void
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
);
void
DryRunPrepare
(
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
void
Prepare
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
bool
prepare_feed
);
void
CheckGC
(
const
Instruction
&
instr
);
...
...
@@ -79,22 +81,17 @@ class InterpreterCore {
bool
is_build_
;
const
platform
::
Place
&
place_
;
BlockDesc
*
block_
;
// not owned
const
BlockDesc
&
block_
;
// not owned
VariableScope
*
global_scope_
;
// not owned
std
::
vector
<
paddle
::
framework
::
OpFuncNode
>
vec_func_list_
;
std
::
vector
<
Instruction
>
vec_instruction_
;
// deconstruct before OpFuncNode
InstructionInfo
instruction_info_
;
std
::
vector
<
size_t
>
dependecy_count_
;
std
::
atomic
<
size_t
>
op_run_number_
{
0
};
std
::
vector
<
std
::
vector
<
size_t
>>
input_var2op_info_
;
std
::
vector
<
VariableMetaInfo
>
vec_meta_info_
;
std
::
vector
<
std
::
string
>
feed_names_
;
InterpreterProfiler
dry_run_profiler_
;
StreamAnalyzer
stream_analyzer_
;
EventManager
event_manager_
;
EventsWaiter
main_thread_blocker_
;
std
::
unique_ptr
<
interpreter
::
AsyncWorkQueue
>
async_work_queue_
;
details
::
ExceptionHolder
exception_holder_
;
...
...
@@ -102,7 +99,6 @@ class InterpreterCore {
std
::
unique_ptr
<
InterpreterCoreGarbageCollector
>
gc_
;
std
::
vector
<
paddle
::
platform
::
DeviceEvent
>
gc_event_
;
std
::
atomic
<
size_t
>
op_run_number_
{
0
};
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/new_executor_defs.h
浏览文件 @
8b2c906a
...
...
@@ -607,6 +607,12 @@ class VariableScope : public ScopeBase {
platform
::
errors
::
NotFound
(
"%s not in VariableScope."
,
name
));
}
std
::
vector
<
VariableMetaInfo
>&
MutableVecMetaInfo
()
{
return
vec_meta_info_
;
}
const
std
::
vector
<
VariableMetaInfo
>&
VecMetaInfo
()
const
{
return
vec_meta_info_
;
}
private:
std
::
vector
<
Variable
*>
var_list_
;
std
::
map
<
std
::
string
,
int
>
name2id_
;
...
...
paddle/fluid/framework/new_executor/profiler.h
浏览文件 @
8b2c906a
...
...
@@ -20,84 +20,41 @@
namespace
paddle
{
namespace
framework
{
static
void
GetTensors
(
Variable
*
var
,
std
::
unordered_set
<
Tensor
*>*
tensor_set
)
{
if
(
var
->
IsType
<
LoDTensor
>
()
&&
var
->
Get
<
LoDTensor
>
().
IsInitialized
())
{
tensor_set
->
insert
(
var
->
GetMutable
<
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
()
&&
var
->
Get
<
SelectedRows
>
().
value
().
IsInitialized
())
{
tensor_set
->
insert
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
());
}
else
if
(
var
->
IsType
<
LoDTensorArray
>
())
{
auto
*
tensor_arr
=
var
->
GetMutable
<
LoDTensorArray
>
();
for
(
auto
&
t
:
*
tensor_arr
)
{
if
(
t
.
IsInitialized
())
{
tensor_set
->
insert
(
&
t
);
}
}
}
}
static
std
::
pair
<
size_t
,
size_t
>
GetTensorMemorySize
(
const
std
::
vector
<
Variable
*>&
var_list
)
{
std
::
unordered_set
<
Tensor
*>
tensor_set
;
for
(
auto
*
var
:
var_list
)
{
GetTensors
(
var
,
&
tensor_set
);
}
size_t
host_memory_bytes
=
0
;
size_t
device_memory_bytes
=
0
;
std
::
unordered_set
<
memory
::
Allocation
*>
allocation_set
;
for
(
auto
*
tensor
:
tensor_set
)
{
auto
allocation
=
tensor
->
Holder
().
get
();
if
(
!
allocation_set
.
count
(
allocation
))
{
allocation_set
.
insert
(
allocation
);
if
(
platform
::
is_cuda_pinned_place
(
tensor
->
place
())
||
platform
::
is_cpu_place
(
tensor
->
place
()))
{
VLOG
(
3
)
<<
"found host memory : "
<<
allocation
->
size
();
host_memory_bytes
+=
allocation
->
size
();
}
else
{
VLOG
(
3
)
<<
"found device memory : "
<<
allocation
->
size
();
device_memory_bytes
+=
allocation
->
size
();
}
}
}
return
{
host_memory_bytes
,
device_memory_bytes
};
}
namespace
interpreter
{
struct
CostInfo
{
double
total_time
{
0.
};
// ms
size_t
device_memory_bytes
{
0
};
// total allocated memory size
};
class
InterpreterProfiler
{
class
ProfilerGuard
{
public:
void
Start
()
{
timer_
.
Start
();
}
void
Pause
()
{
timer_
.
Pause
();
cost_info_
.
total_time
+=
timer_
.
ElapsedMS
();
ProfilerGuard
(
const
platform
::
Place
&
place
,
CostInfo
*
cost_info
)
:
place_
(
place
),
cost_info_
(
cost_info
)
{
timer_
.
Start
();
}
void
Reset
()
{
timer_
.
Reset
();
cost_info_
.
total_time
=
0.
;
cost_info_
.
device_memory_bytes
=
0
;
~
ProfilerGuard
()
{
timer_
.
Pause
();
cost_info_
->
total_time
+=
timer_
.
ElapsedMS
()
;
TotalCUDAAllocatedMemorySize
(
place_
)
;
}
private:
void
TotalCUDAAllocatedMemorySize
(
const
platform
::
Place
&
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
cuda_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
);
cost_info_
.
device_memory_bytes
=
cost_info_
->
device_memory_bytes
=
platform
::
RecordedCudaMallocSize
(
cuda_place
.
device
);
#endif
}
}
const
CostInfo
&
GetCostInfo
()
const
{
return
cost_info_
;
}
private:
const
platform
::
Place
&
place_
;
CostInfo
*
cost_info_
;
platform
::
Timer
timer_
;
CostInfo
cost_info_
;
};
}
// namespace interpreter
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/standalone_executor.cc
浏览文件 @
8b2c906a
...
...
@@ -51,16 +51,15 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
);
return
core
->
Run
(
feed_tensors
);
return
core
->
Run
(
feed_
names
,
feed_
tensors
);
}
const
CostInfo
&
StandaloneExecutor
::
DryRun
(
framework
::
interpreter
::
CostInfo
StandaloneExecutor
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
{});
auto
&
cost_info
=
core
->
DryRun
(
feed_tensors
);
return
cost_info
;
return
core
->
DryRun
(
feed_names
,
feed_tensors
);
}
void
StandaloneExecutor
::
BuildVariableOuterScope
(
...
...
@@ -102,8 +101,8 @@ std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
auto
*
block
=
new_prog
->
MutableBlock
(
0
);
interpreter
::
add_fetch
(
fetch_names
,
block
);
auto
core
=
std
::
make_shared
<
InterpreterCore
>
(
place_
,
block
,
&
global_scope_
,
feed_names
);
auto
core
=
std
::
make_shared
<
InterpreterCore
>
(
place_
,
*
block
,
&
global_scope_
);
programs_
.
emplace
(
oss
.
str
(),
new_prog
);
interpretercores_
.
emplace
(
oss
.
str
(),
core
);
return
core
;
...
...
paddle/fluid/framework/new_executor/standalone_executor.h
浏览文件 @
8b2c906a
...
...
@@ -45,8 +45,9 @@ class StandaloneExecutor : public ExecutorBase {
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
framework
::
interpreter
::
CostInfo
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
);
private:
void
BuildVariableOuterScope
(
const
framework
::
ProgramDesc
&
pdesc
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
8b2c906a
...
...
@@ -2069,11 +2069,13 @@ All parameter, weight, gradient are variables in Paddle.
fetch_vars
);
});
py
::
class_
<
framework
::
CostInfo
>
(
m
,
"CostInfo"
)
py
::
class_
<
framework
::
interpreter
::
CostInfo
>
(
m
,
"CostInfo"
)
.
def
(
py
::
init
<>
())
.
def
(
"total_time"
,
[](
CostInfo
&
self
)
{
return
self
.
total_time
;
})
.
def
(
"device_memory_bytes"
,
[](
CostInfo
&
self
)
{
return
self
.
device_memory_bytes
;
});
.
def
(
"total_time"
,
[](
interpreter
::
CostInfo
&
self
)
{
return
self
.
total_time
;
})
.
def
(
"device_memory_bytes"
,
[](
interpreter
::
CostInfo
&
self
)
{
return
self
.
device_memory_bytes
;
});
py
::
class_
<
framework
::
StandaloneExecutor
>
(
m
,
"StandaloneExecutor"
)
.
def
(
py
::
init
<
const
platform
::
Place
&
,
const
ProgramDesc
&
,
...
...
@@ -2134,7 +2136,7 @@ All parameter, weight, gradient are variables in Paddle.
feed_tensors
.
push_back
(
t
);
}
CostInfo
cost_info
;
framework
::
interpreter
::
CostInfo
cost_info
;
{
pybind11
::
gil_scoped_release
release
;
cost_info
=
self
.
DryRun
(
feed_names
,
feed_tensors
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录