Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c65c6ae5
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c65c6ae5
编写于
2月 18, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
2月 18, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add flag to control profile level in python API (#22319)
* add python flag to control profile level test=develop
上级
fce37bc5
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
188 addition
and
75 deletion
+188
-75
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+13
-4
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+81
-43
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+21
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-0
python/paddle/fluid/profiler.py
python/paddle/fluid/profiler.py
+38
-5
python/paddle/fluid/tests/unittests/test_profiler.py
python/paddle/fluid/tests/unittests/test_profiler.py
+28
-22
未找到文件。
paddle/fluid/framework/operator.cc
浏览文件 @
c65c6ae5
...
...
@@ -167,7 +167,13 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
}
{
platform
::
RecordEvent
record_event
(
Type
());
// TODO(wangchaochaohu) : refine code to use only one RecordEvent)
// in order to record different op type cost time
// and different op name cost time,we set two event.
platform
::
RecordEvent
op_type_record_event
(
Type
());
auto
op_name
=
platform
::
OpName
(
outputs_
,
Type
());
platform
::
RecordEvent
op_name_record_event
(
op_name
,
platform
::
RecordRole
::
kUniqueOp
);
RunImpl
(
scope
,
place
);
}
...
...
@@ -950,7 +956,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
Scope
*
transfer_scope
=
nullptr
;
{
platform
::
RecordEvent
record_event
(
"prepare_data_inner_op"
);
platform
::
RecordEvent
record_event
(
"prepare_data"
,
platform
::
RecordRole
::
kInnerOp
);
transfer_scope
=
PrepareData
(
scope
,
*
kernel_type_
,
&
transfered_inplace_vars
,
runtime_ctx
);
}
...
...
@@ -963,7 +970,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
if
(
!
all_kernels_must_compute_runtime_shape_
)
{
platform
::
RecordEvent
record_event
(
"infer_shape_inner_op"
);
platform
::
RecordEvent
record_event
(
"infer_shape"
,
platform
::
RecordRole
::
kInnerOp
);
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
*
runtime_ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
}
...
...
@@ -975,7 +983,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs.
{
platform
::
RecordEvent
record_event
(
"compute_inner_op"
);
platform
::
RecordEvent
record_event
(
"compute"
,
platform
::
RecordRole
::
kInnerOp
);
(
*
kernel_func_
)(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
*
runtime_ctx
,
kernel_configs
));
}
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
c65c6ae5
...
...
@@ -42,6 +42,7 @@ static int64_t profiler_lister_id = 0;
static
bool
should_send_profile_state
=
false
;
std
::
mutex
profiler_mu
;
static
TracerOption
g_tracer_option
=
TracerOption
::
kDefault
;
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// The thread local event list only can be accessed by the specific thread
...
...
@@ -137,11 +138,15 @@ void PopEvent(const std::string &name) {
GetEventList
().
Record
(
EventType
::
kPopRange
,
name
,
g_thread_id
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
RecordRole
role
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
,
role_
(
role
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
||
name
.
empty
())
return
;
if
((
g_tracer_option
==
TracerOption
::
kOpDetail
&&
role_
!=
RecordRole
::
kInnerOp
)
||
(
g_tracer_option
==
TracerOption
::
kDefault
&&
role
!=
RecordRole
::
kOrdinary
))
return
;
// lock is not needed, the code below is thread-safe
is_enabled_
=
true
;
Event
*
e
=
PushEvent
(
name
);
// Maybe need the same push/pop behavior.
...
...
@@ -363,30 +368,21 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
}
if
(
print_depth
>=
100
)
return
;
int
print_depth_next
=
print_depth
;
if
(
events_table
.
size
()
<=
0
)
return
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
auto
event_item
=
events_table
[
i
][
j
];
std
::
vector
<
std
::
vector
<
EventItem
>>
child_table
;
std
::
vector
<
EventItem
>
table
;
bool
do_next
=
false
;
std
::
string
op_end_str
=
"inner_op"
;
for
(
auto
it
=
child_map
.
begin
();
it
!=
child_map
.
end
();
it
++
)
{
if
(
it
->
first
==
event_item
.
name
)
{
table
.
push_back
(
it
->
second
);
if
(
!
do_next
)
do_next
=
!
(
it
->
second
.
name
.
rfind
(
op_end_str
)
==
(
it
->
second
.
name
.
length
()
-
op_end_str
.
length
()));
}
}
child_table
.
push_back
(
table
);
if
(
do_next
)
print_depth_next
=
print_depth
+
1
;
else
print_depth_next
=
print_depth
+
100
;
auto
name_len
=
event_item
.
name
.
length
();
std
::
string
print_name
=
event_item
.
name
.
substr
(
remove_len
,
name_len
);
std
::
string
delimiter
;
...
...
@@ -394,10 +390,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
delimiter
=
" "
+
delimiter
;
}
print_name
=
delimiter
+
print_name
;
size_t
pos
=
0
;
while
((
pos
=
print_name
.
find
(
op_end_str
))
!=
std
::
string
::
npos
)
{
print_name
.
erase
(
pos
,
op_end_str
.
length
());
}
std
::
cout
<<
std
::
setw
(
name_width
)
<<
print_name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
;
...
...
@@ -416,14 +409,14 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ratio
<<
std
::
endl
;
PrintProfiler
(
child_table
,
child_map
,
sorted_domain
,
name_width
,
data_width
,
merge_thread
,
print_depth
_next
,
0
);
data_width
,
merge_thread
,
print_depth
+
1
,
0
);
}
}
}
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
SetSortedFunc
(
EventSortingKey
sorted_by
,
std
::
string
*
domain
)
{
auto
sorted_domain
=
*
domain
;
std
::
string
sorted_
domain
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
...
...
@@ -471,6 +464,7 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
default:
sorted_domain
=
"event first end time"
;
}
*
domain
=
sorted_domain
;
return
sorted_func
;
}
...
...
@@ -709,6 +703,45 @@ void ParseMemEvents(const std::vector<std::vector<MemEvent>> &events) {
PrintMemProfiler
(
annotation_report
,
55
,
18
);
}
void
DealWithShowName
()
{
std
::
unordered_map
<
std
::
string
,
int
>
prefix_name
;
std
::
vector
<
std
::
string
>
op_out_name
;
for
(
auto
it
=
g_all_event_lists
.
begin
();
it
!=
g_all_event_lists
.
end
();
++
it
)
{
for
(
auto
&
block
:
(
*
it
)
->
event_blocks
)
{
for
(
auto
&
r
:
block
)
{
auto
event_name
=
r
.
name
();
size_t
start
=
event_name
.
find
(
'%'
,
0
);
size_t
end
=
event_name
.
find
(
'%'
,
start
+
1
);
std
::
string
prefix_str
=
event_name
.
substr
(
0
,
start
);
while
(
start
!=
std
::
string
::
npos
&&
end
!=
std
::
string
::
npos
)
{
auto
search_str
=
event_name
.
substr
(
start
,
end
-
start
+
1
);
auto
it
=
find
(
op_out_name
.
begin
(),
op_out_name
.
end
(),
search_str
);
std
::
string
replace_str
;
bool
prefix_find
=
true
;
if
(
prefix_name
.
find
(
prefix_str
)
==
prefix_name
.
end
())
{
prefix_find
=
false
;
prefix_name
[
prefix_str
]
=
0
;
}
if
(
it
==
op_out_name
.
end
())
{
if
(
prefix_find
)
prefix_name
[
prefix_str
]
=
prefix_name
[
prefix_str
]
+
1
;
op_out_name
.
push_back
(
search_str
);
}
replace_str
=
std
::
to_string
(
prefix_name
[
prefix_str
]);
event_name
.
replace
(
start
,
end
-
start
+
1
,
replace_str
);
start
=
start
+
1
;
start
=
event_name
.
find
(
'%'
,
start
);
end
=
event_name
.
find
(
'%'
,
start
+
1
);
prefix_str
=
event_name
.
substr
(
0
,
start
);
}
r
.
set_name
(
event_name
);
}
}
}
}
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
)
{
SynchronizeAllDevice
();
...
...
@@ -718,37 +751,17 @@ void DisableProfiler(EventSortingKey sorted_key,
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
);
DealWithShowName
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
tracer
->
IsEnabled
())
{
tracer
->
Disable
();
tracer
->
GenProfile
(
profile_path
);
tracer
->
GenEventKernelCudaElapsedTime
();
tracer
->
GenProfile
(
profile_path
);
}
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
std
::
vector
<
std
::
string
>
op_name
;
for
(
size_t
i
=
0
;
i
<
(
all_events
).
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
(
all_events
)[
i
].
size
();
j
++
)
{
std
::
string
event_name
=
(
all_events
)[
i
][
j
].
name
();
size_t
start
=
event_name
.
find
(
'%'
,
0
);
size_t
end
=
event_name
.
rfind
(
'%'
,
event_name
.
length
()
-
1
);
if
(
start
==
std
::
string
::
npos
||
end
==
std
::
string
::
npos
)
continue
;
std
::
string
search_str
=
event_name
.
substr
(
start
,
end
-
start
+
1
);
auto
it
=
find
(
op_name
.
begin
(),
op_name
.
end
(),
search_str
);
std
::
string
replace_str
;
if
(
it
!=
op_name
.
end
())
{
replace_str
=
std
::
to_string
(
std
::
distance
(
op_name
.
begin
(),
it
));
}
else
{
replace_str
=
std
::
to_string
(
op_name
.
size
());
op_name
.
push_back
(
search_str
);
}
event_name
.
replace
(
start
,
end
-
start
+
1
,
replace_str
);
(
all_events
)[
i
][
j
].
set_name
(
event_name
);
}
}
ParseEvents
(
all_events
,
true
,
sorted_key
);
ParseEvents
(
all_events
,
false
,
sorted_key
);
if
(
VLOG_IS_ON
(
5
))
{
...
...
@@ -773,5 +786,30 @@ void SetProfileListener() {
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
std
::
string
OpName
(
const
framework
::
VariableNameMap
&
name_map
,
const
std
::
string
&
type_name
)
{
if
(
platform
::
GetTracerOption
()
!=
platform
::
TracerOption
::
kAllOpDetail
)
return
""
;
std
::
string
ret
=
type_name
+
"%"
;
for
(
auto
it
=
name_map
.
begin
();
it
!=
name_map
.
end
();
it
++
)
{
auto
name_outputs
=
it
->
second
;
if
(
!
name_outputs
.
empty
()
&&
type_name
.
length
()
<
name_outputs
[
0
].
length
())
{
ret
=
ret
+
name_outputs
[
0
];
break
;
}
}
ret
=
ret
+
"%"
;
return
ret
;
}
void
SetTracerOption
(
TracerOption
option
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
g_tracer_option
=
option
;
}
platform
::
TracerOption
GetTracerOption
()
{
return
g_tracer_option
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler.h
浏览文件 @
c65c6ae5
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -39,6 +40,19 @@ enum class ProfilerState {
kAll
,
// Profile both CPU and GPU. (Currently experimental).
};
enum
class
RecordRole
{
kOrdinary
,
// only record op time with op type key
kInnerOp
,
// record op detail time with op type key
kUniqueOp
,
// record op detail time with op unique name key
};
// it is the flag to control to print the profiling result
enum
class
TracerOption
{
kDefault
,
// print the different op type profiling result
kOpDetail
,
// print the detail profiling result of different op type
kAllOpDetail
,
// print the detail profiling result of different op name
};
void
Mark
(
const
std
::
string
&
name
);
void
PushMemEvent
(
uint64_t
start_ns
,
uint64_t
end_ns
,
size_t
bytes
,
...
...
@@ -79,7 +93,8 @@ Event* PushEvent(const std::string& name);
void
PopEvent
(
const
std
::
string
&
name
);
struct
RecordEvent
{
explicit
RecordEvent
(
const
std
::
string
&
name
);
RecordEvent
(
const
std
::
string
&
name
,
const
RecordRole
r_type
=
RecordRole
::
kOrdinary
);
~
RecordEvent
();
...
...
@@ -90,6 +105,7 @@ struct RecordEvent {
// Need to distinguish name by op type, block_id, program_id and perhaps
// different kernel invocations within an op.
std
::
string
full_name_
;
RecordRole
role_
{
RecordRole
::
kOrdinary
};
};
class
RecordRPCEvent
{
...
...
@@ -181,6 +197,10 @@ bool ShouldSendProfileState();
void
SetProfileListener
();
int64_t
ListenerId
();
std
::
string
OpName
(
const
framework
::
VariableNameMap
&
name_map
,
const
std
::
string
&
type_name
);
void
SetTracerOption
(
TracerOption
option
);
platform
::
TracerOption
GetTracerOption
();
#ifdef PADDLE_WITH_CUDA
void
DummyKernelAndEvent
();
#endif
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
c65c6ae5
...
...
@@ -1555,6 +1555,12 @@ All parameter, weight, gradient are variables in Paddle.
#endif
#endif
py
::
enum_
<
platform
::
TracerOption
>
(
m
,
"TracerOption"
,
py
::
arithmetic
())
.
value
(
"kDefault"
,
platform
::
TracerOption
::
kDefault
)
.
value
(
"kOpDetail"
,
platform
::
TracerOption
::
kOpDetail
)
.
value
(
"kAllOpDetail"
,
platform
::
TracerOption
::
kAllOpDetail
)
.
export_values
();
py
::
enum_
<
platform
::
ProfilerState
>
(
m
,
"ProfilerState"
,
py
::
arithmetic
())
.
value
(
"kDisabled"
,
platform
::
ProfilerState
::
kDisabled
)
.
value
(
"kCPU"
,
platform
::
ProfilerState
::
kCPU
)
...
...
@@ -1571,6 +1577,7 @@ All parameter, weight, gradient are variables in Paddle.
.
value
(
"kAve"
,
platform
::
EventSortingKey
::
kAve
)
.
export_values
();
m
.
def
(
"set_tracer_option"
,
platform
::
SetTracerOption
);
m
.
def
(
"enable_profiler"
,
platform
::
EnableProfiler
);
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
...
...
python/paddle/fluid/profiler.py
浏览文件 @
c65c6ae5
...
...
@@ -126,7 +126,7 @@ def reset_profiler():
core
.
reset_profiler
()
def
start_profiler
(
state
):
def
start_profiler
(
state
,
tracer_option
=
'Default'
):
"""
Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to profile, which is equal to the usage
...
...
@@ -137,9 +137,15 @@ def start_profiler(state):
or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
both CPU and GPU; 'All' means profiling both CPU and GPU, and
generates timeline as well.
tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. Default option print
the different Op type profiling result and the OpDetail option print the detail profiling
result of different op types such as compute and data transform, AllOpDetail option
print the detail profiling result of different op name same as OpDetail.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
ValueError: If `tracer_option` is not in ['Default', 'OpDetail', 'AllOpDetail']
Examples:
...
...
@@ -154,6 +160,13 @@ def start_profiler(state):
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
profiler.start_profiler('GPU', "OpDetail")
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
if
core
.
is_profiler_enabled
():
return
...
...
@@ -165,6 +178,18 @@ def start_profiler(state):
prof_state
=
core
.
ProfilerState
.
kCPU
else
:
prof_state
=
core
.
ProfilerState
.
kAll
if
tracer_option
not
in
[
'Default'
,
'OpDetail'
,
'AllOpDetail'
]:
raise
ValueError
(
"tracer option must be 'Default', 'OpDetail', 'AllOpDetail'."
)
if
tracer_option
==
"Default"
:
prof_tracer_option
=
core
.
TracerOption
.
kDefault
elif
tracer_option
==
"OpDetail"
:
prof_tracer_option
=
core
.
TracerOption
.
kOpDetail
else
:
prof_tracer_option
=
core
.
TracerOption
.
kAllOpDetail
core
.
set_tracer_option
(
prof_tracer_option
)
core
.
enable_profiler
(
prof_state
)
...
...
@@ -184,8 +209,8 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time.
profile_path (str, optional) : If state == 'All', it will generate timeline,
and write it into `profile_path`. The default profile_path is `/tmp/profile`.
profile_path (str, optional) : If state == 'All', it will generate timeline,
Raises:
ValueError: If `sorted_key` is not in
...
...
@@ -225,7 +250,10 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
@
signature_safe_contextmanager
def
profiler
(
state
,
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
def
profiler
(
state
,
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
,
tracer_option
=
'Default'
):
"""
The profiler interface. Different from `fluid.profiler.cuda_profiler`,
this profiler can be used to profile both CPU and GPU program.
...
...
@@ -246,6 +274,11 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time.
profile_path (str, optional) : If state == 'All', it will generate timeline,
and write it into `profile_path`. The default profile_path is `/tmp/profile`.
tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. Default option print
the different Op type profiling result and the OpDetail option print the detail profiling
result of different op types such as compute and data transform, AllOpDetail option
print the detail profiling result of different op name same as OpDetail.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
...
...
@@ -268,7 +301,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
with profiler.profiler('CPU', 'total', '/tmp/profile') as prof:
with profiler.profiler('CPU', 'total', '/tmp/profile'
, 'Default'
) as prof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
...
...
@@ -311,6 +344,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
thread0::conv2d 8 7.93456 0.291385 5.63342 0.99182 0.795243
thread0::elementwise_add 8 1.96555 0.191884 0.518004 0.245693 0.196998
"""
start_profiler
(
state
)
start_profiler
(
state
,
tracer_option
)
yield
stop_profiler
(
sorted_key
,
profile_path
)
python/paddle/fluid/tests/unittests/test_profiler.py
浏览文件 @
c65c6ae5
...
...
@@ -22,6 +22,7 @@ import paddle.fluid as fluid
import
paddle.fluid.profiler
as
profiler
import
paddle.fluid.layers
as
layers
import
paddle.fluid.core
as
core
from
paddle.fluid
import
compiler
,
Program
,
program_guard
import
paddle.fluid.proto.profiler.profiler_pb2
as
profiler_pb2
...
...
@@ -30,7 +31,11 @@ class TestProfiler(unittest.TestCase):
def
setUpClass
(
cls
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
def
net_profiler
(
self
,
state
,
use_parallel_executor
=
False
):
def
net_profiler
(
self
,
state
,
option
,
iter_range
=
None
,
use_parallel_executor
=
False
):
profile_path
=
os
.
path
.
join
(
tempfile
.
gettempdir
(),
"profile"
)
open
(
profile_path
,
"w"
).
write
(
""
)
startup_program
=
fluid
.
Program
()
...
...
@@ -75,7 +80,7 @@ class TestProfiler(unittest.TestCase):
main_program
=
main_program
)
pass_acc_calculator
=
fluid
.
average
.
WeightedAverage
()
with
profiler
.
profiler
(
state
,
'total'
,
profile_path
)
as
prof
:
with
profiler
.
profiler
(
state
,
'total'
,
profile_path
,
option
)
as
prof
:
for
iter
in
range
(
10
):
if
iter
==
2
:
profiler
.
reset_profiler
()
...
...
@@ -94,36 +99,37 @@ class TestProfiler(unittest.TestCase):
pass_acc_calculator
.
add
(
value
=
acc
,
weight
=
b_size
)
pass_acc
=
pass_acc_calculator
.
eval
()
data
=
open
(
profile_path
,
'rb'
).
read
()
self
.
assertGreater
(
len
(
data
),
0
)
profile_pb
=
profiler_pb2
.
Profile
()
profile_pb
.
ParseFromString
(
data
)
self
.
assertGreater
(
len
(
profile_pb
.
events
),
0
)
for
event
in
profile_pb
.
events
:
if
event
.
type
==
profiler_pb2
.
Event
.
GPUKernel
:
if
not
event
.
detail_info
and
not
event
.
name
.
startswith
(
"MEM"
):
raise
Exception
(
"Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
%
event
.
name
)
elif
event
.
type
==
profiler_pb2
.
Event
.
CPU
and
(
event
.
name
.
startswith
(
"Driver API"
)
or
event
.
name
.
startswith
(
"Runtime API"
)):
print
(
"Warning: unregister"
,
event
.
name
)
if
(
len
(
data
)
>
0
):
profile_pb
=
profiler_pb2
.
Profile
()
profile_pb
.
ParseFromString
(
data
)
self
.
assertGreater
(
len
(
profile_pb
.
events
),
0
)
for
event
in
profile_pb
.
events
:
if
event
.
type
==
profiler_pb2
.
Event
.
GPUKernel
:
if
not
event
.
detail_info
and
not
event
.
name
.
startswith
(
"MEM"
):
raise
Exception
(
"Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
%
event
.
name
)
elif
event
.
type
==
profiler_pb2
.
Event
.
CPU
and
(
event
.
name
.
startswith
(
"Driver API"
)
or
event
.
name
.
startswith
(
"Runtime API"
)):
print
(
"Warning: unregister"
,
event
.
name
)
def
test_cpu_profiler
(
self
):
self
.
net_profiler
(
'CPU'
)
self
.
net_profiler
(
'CPU'
,
use_parallel_executor
=
True
)
self
.
net_profiler
(
'CPU'
,
"Default"
)
self
.
net_profiler
(
'CPU'
,
"Default"
,
use_parallel_executor
=
True
)
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"profiler is enabled only with GPU"
)
def
test_cuda_profiler
(
self
):
self
.
net_profiler
(
'GPU'
)
self
.
net_profiler
(
'GPU'
,
use_parallel_executor
=
True
)
self
.
net_profiler
(
'GPU'
,
"OpDetail"
)
self
.
net_profiler
(
'GPU'
,
"OpDetail"
,
use_parallel_executor
=
True
)
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"profiler is enabled only with GPU"
)
def
test_all_profiler
(
self
):
self
.
net_profiler
(
'All'
)
self
.
net_profiler
(
'All'
,
use_parallel_executor
=
True
)
self
.
net_profiler
(
'All'
,
"AllOpDetail"
)
self
.
net_profiler
(
'All'
,
"AllOpDetail"
,
use_parallel_executor
=
True
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录