Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
72ff5a09
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
72ff5a09
编写于
3月 03, 2020
作者:
Z
Zhang Ting
提交者:
GitHub
3月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix print bug of profile, test=develop (#22804)
上级
4e8bc024
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
63 addition
and
32 deletion
+63
-32
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+4
-4
paddle/fluid/platform/event.h
paddle/fluid/platform/event.h
+11
-1
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+7
-6
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+4
-9
paddle/fluid/platform/profiler_helper.h
paddle/fluid/platform/profiler_helper.h
+34
-10
paddle/fluid/platform/profiler_test.cc
paddle/fluid/platform/profiler_test.cc
+3
-2
未找到文件。
paddle/fluid/framework/operator.cc
浏览文件 @
72ff5a09
...
...
@@ -173,7 +173,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform
::
RecordEvent
op_type_record_event
(
Type
());
auto
op_name
=
platform
::
OpName
(
outputs_
,
Type
());
platform
::
RecordEvent
op_name_record_event
(
op_name
,
platform
::
Record
Role
::
kUniqueOp
);
op_name
,
platform
::
Event
Role
::
kUniqueOp
);
RunImpl
(
scope
,
place
);
}
...
...
@@ -957,7 +957,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
Scope
*
transfer_scope
=
nullptr
;
{
platform
::
RecordEvent
record_event
(
"prepare_data"
,
platform
::
Record
Role
::
kInnerOp
);
platform
::
Event
Role
::
kInnerOp
);
transfer_scope
=
PrepareData
(
scope
,
*
kernel_type_
,
&
transfered_inplace_vars
,
runtime_ctx
);
}
...
...
@@ -971,7 +971,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
if
(
!
all_kernels_must_compute_runtime_shape_
)
{
platform
::
RecordEvent
record_event
(
"infer_shape"
,
platform
::
Record
Role
::
kInnerOp
);
platform
::
Event
Role
::
kInnerOp
);
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
*
runtime_ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
}
...
...
@@ -984,7 +984,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// not Scope. Imperative mode only pass inputs and get outputs.
{
platform
::
RecordEvent
record_event
(
"compute"
,
platform
::
Record
Role
::
kInnerOp
);
platform
::
Event
Role
::
kInnerOp
);
(
*
kernel_func_
)(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
*
runtime_ctx
,
kernel_configs
));
}
...
...
paddle/fluid/platform/event.h
浏览文件 @
72ff5a09
...
...
@@ -25,18 +25,27 @@ namespace platform {
enum
class
EventType
{
kMark
,
kPushRange
,
kPopRange
};
enum
class
EventRole
{
kOrdinary
,
// only record op time with op type key
kInnerOp
,
// record op detail time with op type key
kUniqueOp
,
// record op detail time with op unique name key
};
class
Event
{
public:
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
Event
(
EventType
type
,
std
::
string
name
,
uint32_t
thread_id
);
Event
(
EventType
type
,
std
::
string
name
,
uint32_t
thread_id
,
EventRole
role
=
EventRole
::
kOrdinary
);
const
EventType
&
type
()
const
;
Event
*
parent
()
const
{
return
parent_
;
}
void
set_parent
(
Event
*
parent
)
{
parent_
=
parent
;
}
std
::
string
name
()
const
{
return
name_
;
}
EventRole
role
()
const
{
return
role_
;
}
uint32_t
thread_id
()
const
{
return
thread_id_
;
}
void
set_name
(
std
::
string
name
)
{
name_
=
name
;
}
void
set_role
(
EventRole
role
)
{
role_
=
role
;
}
#ifdef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUPTI
...
...
@@ -53,6 +62,7 @@ class Event {
std
::
string
name_
{};
Event
*
parent_
{
nullptr
};
uint32_t
thread_id_
;
EventRole
role_
{};
int64_t
cpu_ns_
;
bool
visited_status_
{
false
};
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
72ff5a09
...
...
@@ -42,8 +42,9 @@ namespace platform {
MemEvenRecorder
MemEvenRecorder
::
recorder
;
Event
::
Event
(
EventType
type
,
std
::
string
name
,
uint32_t
thread_id
)
:
type_
(
type
),
name_
(
name
),
thread_id_
(
thread_id
)
{
Event
::
Event
(
EventType
type
,
std
::
string
name
,
uint32_t
thread_id
,
EventRole
role
)
:
type_
(
type
),
name_
(
name
),
thread_id_
(
thread_id
),
role_
(
role
)
{
cpu_ns_
=
GetTimeInNsec
();
}
...
...
@@ -62,12 +63,12 @@ double Event::CudaElapsedMs(const Event &e) const {
#endif
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
Record
Role
role
)
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
Event
Role
role
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
()),
role_
(
role
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
||
name
.
empty
())
return
;
// lock is not needed, the code below is thread-safe
is_enabled_
=
true
;
Event
*
e
=
PushEvent
(
name
);
Event
*
e
=
PushEvent
(
name
,
role
);
// Maybe need the same push/pop behavior.
SetCurAnnotation
(
e
);
name_
=
e
->
name
();
...
...
@@ -179,8 +180,8 @@ void Mark(const std::string &name) {
GetEventList
().
Record
(
EventType
::
kMark
,
name
,
g_thread_id
);
}
Event
*
PushEvent
(
const
std
::
string
&
name
)
{
return
GetEventList
().
Record
(
EventType
::
kPushRange
,
name
,
g_thread_id
);
Event
*
PushEvent
(
const
std
::
string
&
name
,
const
EventRole
role
)
{
return
GetEventList
().
Record
(
EventType
::
kPushRange
,
name
,
g_thread_id
,
role
);
}
void
PopEvent
(
const
std
::
string
&
name
)
{
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
72ff5a09
...
...
@@ -43,12 +43,6 @@ enum class ProfilerState {
kAll
,
// Profile both CPU and GPU. (Currently experimental).
};
enum
class
RecordRole
{
kOrdinary
,
// only record op time with op type key
kInnerOp
,
// record op detail time with op type key
kUniqueOp
,
// record op detail time with op unique name key
};
// it is the flag to control to print the profiling result
enum
class
TracerOption
{
kDefault
,
// print the different op type profiling result
...
...
@@ -86,6 +80,7 @@ struct EventItem {
double
cpu_time
;
double
gpu_time
;
float
ratio
;
EventRole
role
;
};
struct
OverHead
{
...
...
@@ -128,7 +123,7 @@ struct MemEvenRecorder {
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
RecordRole
role
=
Record
Role
::
kOrdinary
);
const
EventRole
role
=
Event
Role
::
kOrdinary
);
~
RecordEvent
();
...
...
@@ -139,7 +134,7 @@ struct RecordEvent {
// Need to distinguish name by op type, block_id, program_id and perhaps
// different kernel invocations within an op.
std
::
string
full_name_
;
RecordRole
role_
{
Record
Role
::
kOrdinary
};
EventRole
role_
{
Event
Role
::
kOrdinary
};
};
class
RecordRPCEvent
{
...
...
@@ -201,7 +196,7 @@ void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const
Place
&
place
,
const
std
::
string
&
annotation
);
void
PopMemEvent
(
uint64_t
start_ns
,
uint64_t
end_ns
,
size_t
bytes
,
const
Place
&
place
,
const
std
::
string
&
annotation
);
Event
*
PushEvent
(
const
std
::
string
&
name
);
Event
*
PushEvent
(
const
std
::
string
&
name
,
const
EventRole
role
);
void
PopEvent
(
const
std
::
string
&
name
);
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
...
...
paddle/fluid/platform/profiler_helper.h
浏览文件 @
72ff5a09
...
...
@@ -304,9 +304,9 @@ void SetEvent(bool merge_thread, Event analyze_event, size_t *max_name_width,
if
(
event_idx
->
find
(
event_name
)
==
event_idx
->
end
())
{
event_idx
->
insert
({
event_name
,
event_items
->
size
()});
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event
_time
,
cpu_time
,
gpu_time
,
0.
};
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event_time
,
cpu_time
,
gpu
_time
,
0.
,
rit
->
role
()
};
event_items
->
push_back
(
event_item
);
}
else
{
int
index
=
event_idx
->
at
(
event_name
);
...
...
@@ -335,8 +335,10 @@ void SetEvent(bool merge_thread, Event analyze_event, size_t *max_name_width,
void
ComputeOverhead
(
const
std
::
multimap
<
std
::
string
,
EventItem
>
&
sub_child_map
,
OverHead
*
overhead
)
{
EventItem
memcpy_async
=
{
"GpuMemcpyAsync"
,
0
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.0
f
};
EventItem
memcpy_sync
=
{
"GpuMemcpySync"
,
0
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.0
f
};
EventItem
memcpy_async
=
{
"GpuMemcpyAsync"
,
0
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.0
f
,
EventRole
::
kOrdinary
};
EventItem
memcpy_sync
=
{
"GpuMemcpySync"
,
0
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.0
f
,
EventRole
::
kOrdinary
};
for
(
auto
it
=
sub_child_map
.
begin
();
it
!=
sub_child_map
.
end
();
it
++
)
{
if
(
it
->
second
.
name
.
find
(
"compute"
)
!=
std
::
string
::
npos
)
{
overhead
->
compute_ratio
+=
it
->
second
.
ratio
;
...
...
@@ -359,6 +361,29 @@ void ComputeOverhead(const std::multimap<std::string, EventItem> &sub_child_map,
overhead
->
sub_memcpy_items
=
{
memcpy_async
,
memcpy_sync
};
}
std
::
string
FindOrdinaryParent
(
const
std
::
multimap
<
std
::
string
,
EventItem
>
&
sub_child_map
,
std
::
string
name
)
{
bool
find_name
=
false
;
std
::
string
parent
=
name
;
EventRole
role
;
for
(
auto
it
=
sub_child_map
.
begin
();
it
!=
sub_child_map
.
end
();
it
++
)
{
if
(
it
->
second
.
name
==
name
)
{
role
=
it
->
second
.
role
;
parent
=
it
->
first
;
find_name
=
true
;
break
;
}
}
if
(
find_name
&&
role
==
EventRole
::
kOrdinary
)
{
return
name
;
}
else
if
(
find_name
&&
role
!=
EventRole
::
kOrdinary
)
{
return
FindOrdinaryParent
(
sub_child_map
,
parent
);
}
else
{
return
parent
;
}
}
// When TracerOption is KDefault, OpDetail will be recorded but only default
// profile result will be printed.
// GpuMemcpy should be printed in kDefault setting, however it offten occurs
...
...
@@ -376,11 +401,7 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map,
}
else
{
for
(
auto
it
=
sub_child_map
.
begin
();
it
!=
sub_child_map
.
end
();
it
++
)
{
if
(
it
->
second
.
name
.
find
(
"GpuMemcpy"
)
!=
std
::
string
::
npos
)
{
std
::
string
parent_name
=
it
->
first
;
auto
left_pos
=
it
->
first
.
find
(
"/"
);
if
(
left_pos
!=
std
::
string
::
npos
)
{
parent_name
=
it
->
first
.
substr
(
0
,
left_pos
);
}
std
::
string
parent_name
=
FindOrdinaryParent
(
sub_child_map
,
it
->
first
);
auto
item
=
it
->
second
;
auto
right_pos
=
item
.
name
.
rfind
(
"/"
);
if
(
right_pos
!=
std
::
string
::
npos
)
{
...
...
@@ -389,6 +410,9 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map,
item
.
name
=
parent_name
+
"/"
+
child_name
;
}
child_map
->
insert
(
std
::
pair
<
std
::
string
,
EventItem
>
(
parent_name
,
item
));
}
else
if
(
it
->
second
.
role
==
EventRole
::
kOrdinary
)
{
child_map
->
insert
(
std
::
pair
<
std
::
string
,
EventItem
>
(
it
->
first
,
it
->
second
));
}
}
}
...
...
paddle/fluid/platform/profiler_test.cc
浏览文件 @
72ff5a09
...
...
@@ -40,6 +40,7 @@ TEST(RecordEvent, RecordEvent) {
using
paddle
::
platform
::
PopEvent
;
using
paddle
::
platform
::
ProfilerState
;
using
paddle
::
platform
::
EventSortingKey
;
using
paddle
::
platform
::
EventRole
;
ProfilerState
state
=
ProfilerState
::
kCPU
;
EnableProfiler
(
state
);
...
...
@@ -55,7 +56,7 @@ TEST(RecordEvent, RecordEvent) {
for
(
int
loop
=
0
;
loop
<
3
;
++
loop
)
{
for
(
int
i
=
1
;
i
<
5
;
++
i
)
{
std
::
string
name
=
"op_"
+
std
::
to_string
(
i
);
PushEvent
(
name
);
PushEvent
(
name
,
EventRole
::
kOrdinary
);
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
PopEvent
(
name
);
...
...
@@ -107,7 +108,7 @@ TEST(RecordEvent, RecordEvent) {
}
// Bad Usage:
PushEvent
(
"event_without_pop"
);
PushEvent
(
"event_without_pop"
,
EventRole
::
kOrdinary
);
PopEvent
(
"event_without_push"
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
GetAllEvents
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录