Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
dbfe5333
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dbfe5333
编写于
5月 25, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
5月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add pe profiler Event (#24611)
上级
55b664a1
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
98 addition
and
18 deletion
+98
-18
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+3
-0
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+40
-3
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+1
-1
paddle/fluid/platform/event.h
paddle/fluid/platform/event.h
+1
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+4
-5
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+1
-1
paddle/fluid/platform/profiler_helper.h
paddle/fluid/platform/profiler_helper.h
+46
-6
paddle/fluid/platform/profiler_test.cc
paddle/fluid/platform/profiler_test.cc
+2
-2
未找到文件。
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
dbfe5333
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_double
(
eager_delete_tensor_gb
);
...
...
@@ -820,6 +821,8 @@ void ParallelExecutor::BCastParamsToDevices(
FetchResultType
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
bool
return_merged
)
{
VLOG
(
3
)
<<
"enter ParallelExecutor Run"
;
platform
::
RecordEvent
parallel_executor_event
(
"ParallelExecutor::Run"
,
paddle
::
platform
::
EventRole
::
kSpecial
);
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
...
...
paddle/fluid/platform/device_tracer.cc
浏览文件 @
dbfe5333
...
...
@@ -40,6 +40,9 @@ namespace {
thread_local
std
::
deque
<
int
>
block_id_stack
;
// Tracking the nested event stacks.
thread_local
std
::
deque
<
Event
*>
annotation_stack
;
// stack to strore event sunch as pe and so on
static
std
::
deque
<
Event
*>
main_thread_annotation_stack
{};
static
std
::
deque
<
std
::
string
>
main_thread_annotation_stack_name
{};
std
::
map
<
uint32_t
,
int32_t
>
system_thread_id_map
;
...
...
@@ -638,15 +641,49 @@ DeviceTracer *GetDeviceTracer() {
return
tracer
;
}
void
SetCurAnnotation
(
Event
*
event
)
{
if
(
!
annotation_stack
.
empty
())
{
std
::
string
SetCurAnnotation
(
Event
*
event
)
{
std
::
string
ret
;
if
(
!
annotation_stack
.
empty
()
&&
event
->
role
()
!=
EventRole
::
kSpecial
)
{
event
->
set_parent
(
annotation_stack
.
back
());
event
->
set_name
(
annotation_stack
.
back
()
->
name
()
+
"/"
+
event
->
name
());
}
annotation_stack
.
push_back
(
event
);
if
(
!
main_thread_annotation_stack_name
.
empty
()
&&
!
annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
thread_id
()
!=
annotation_stack
.
back
()
->
thread_id
())
{
ret
=
main_thread_annotation_stack_name
.
back
()
+
"/"
+
event
->
name
();
}
else
{
ret
=
event
->
name
();
}
if
(
event
->
role
()
==
EventRole
::
kSpecial
)
{
std
::
string
name
=
event
->
name
();
if
(
!
main_thread_annotation_stack_name
.
empty
())
{
name
=
main_thread_annotation_stack_name
.
back
()
+
"/"
+
event
->
name
();
}
main_thread_annotation_stack_name
.
push_back
(
name
);
main_thread_annotation_stack
.
push_back
(
event
);
}
return
ret
;
}
void
ClearCurAnnotation
()
{
annotation_stack
.
pop_back
();
}
void
ClearCurAnnotation
()
{
if
(
!
main_thread_annotation_stack_name
.
empty
()
&&
!
annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
thread_id
()
!=
annotation_stack
.
back
()
->
thread_id
())
{
annotation_stack
.
back
()
->
set_name
(
main_thread_annotation_stack_name
.
back
()
+
"/"
+
annotation_stack
.
back
()
->
name
());
}
if
(
!
main_thread_annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
name
()
==
annotation_stack
.
back
()
->
name
())
{
main_thread_annotation_stack_name
.
pop_back
();
main_thread_annotation_stack
.
pop_back
();
}
annotation_stack
.
pop_back
();
}
Event
*
CurAnnotation
()
{
if
(
annotation_stack
.
empty
())
return
nullptr
;
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
dbfe5333
...
...
@@ -137,7 +137,7 @@ class DeviceTracer {
DeviceTracer
*
GetDeviceTracer
();
// Set a name for the cuda kernel operation being launched by the thread.
void
SetCurAnnotation
(
Event
*
event
);
std
::
string
SetCurAnnotation
(
Event
*
event
);
// Clear the name after the operation is done.
void
ClearCurAnnotation
();
// Current name of the operation being run in the thread.
...
...
paddle/fluid/platform/event.h
浏览文件 @
dbfe5333
...
...
@@ -29,6 +29,7 @@ enum class EventRole {
kOrdinary
,
// only record op time with op type key
kInnerOp
,
// record op detail time with op type key
kUniqueOp
,
// record op detail time with op unique name key
kSpecial
,
// record event such as PE which is outer of thread local
};
class
Event
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
dbfe5333
...
...
@@ -73,8 +73,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
// lock is not needed, the code below is thread-safe
Event
*
e
=
PushEvent
(
name
,
role
);
// Maybe need the same push/pop behavior.
SetCurAnnotation
(
e
);
name_
=
e
->
name
();
name_
=
SetCurAnnotation
(
e
);
}
RecordEvent
::~
RecordEvent
()
{
...
...
@@ -86,7 +85,7 @@ RecordEvent::~RecordEvent() {
BlockDepth
(),
g_thread_id
);
}
ClearCurAnnotation
();
PopEvent
(
name_
);
PopEvent
(
name_
,
role_
);
}
void
MemEvenRecorder
::
PushMemRecord
(
const
void
*
ptr
,
const
Place
&
place
,
...
...
@@ -187,8 +186,8 @@ Event *PushEvent(const std::string &name, const EventRole role) {
return
GetEventList
().
Record
(
EventType
::
kPushRange
,
name
,
g_thread_id
,
role
);
}
void
PopEvent
(
const
std
::
string
&
name
)
{
GetEventList
().
Record
(
EventType
::
kPopRange
,
name
,
g_thread_id
);
void
PopEvent
(
const
std
::
string
&
name
,
const
EventRole
role
)
{
GetEventList
().
Record
(
EventType
::
kPopRange
,
name
,
g_thread_id
,
role
);
}
void
EnableProfiler
(
ProfilerState
state
)
{
PADDLE_ENFORCE_NE
(
state
,
ProfilerState
::
kDisabled
,
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
dbfe5333
...
...
@@ -197,7 +197,7 @@ void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
void
PopMemEvent
(
uint64_t
start_ns
,
uint64_t
end_ns
,
size_t
bytes
,
const
Place
&
place
,
const
std
::
string
&
annotation
);
Event
*
PushEvent
(
const
std
::
string
&
name
,
const
EventRole
role
);
void
PopEvent
(
const
std
::
string
&
name
);
void
PopEvent
(
const
std
::
string
&
name
,
const
EventRole
role
);
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
...
...
paddle/fluid/platform/profiler_helper.h
浏览文件 @
dbfe5333
...
...
@@ -22,12 +22,12 @@ limitations under the License. */
#include <memory>
#include <mutex> // NOLINT
#include <random>
#include <set>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
...
...
@@ -283,7 +283,8 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
void
SetEvent
(
bool
merge_thread
,
const
Event
&
analyze_event
,
size_t
*
max_name_width
,
std
::
list
<
Event
>
*
pushed_events
,
std
::
vector
<
EventItem
>
*
event_items
,
std
::
unordered_map
<
std
::
string
,
int
>
*
event_idx
)
{
std
::
unordered_map
<
std
::
string
,
int
>
*
event_idx
,
const
std
::
set
<
std
::
string
>
&
main_thread_event_name
)
{
if
(
analyze_event
.
type
()
==
EventType
::
kPushRange
)
{
pushed_events
->
push_back
(
analyze_event
);
}
else
if
(
analyze_event
.
type
()
==
EventType
::
kPopRange
)
{
...
...
@@ -313,8 +314,35 @@ void SetEvent(bool merge_thread, const Event &analyze_event,
if
(
merge_thread
)
{
event_name
=
rit
->
name
();
}
else
{
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
if
(
!
main_thread_event_name
.
empty
())
{
auto
origin_name
=
rit
->
name
();
int
index
=
1
;
int
split_pos
=
0
;
while
((
split_pos
=
FindNthReversePos
(
origin_name
,
'/'
,
index
))
!=
-
1
)
{
auto
prefix_str
=
origin_name
.
substr
(
0
,
split_pos
);
if
(
main_thread_event_name
.
count
(
prefix_str
))
{
break
;
}
index
++
;
}
if
(
split_pos
==
-
1
&&
!
main_thread_event_name
.
count
(
rit
->
name
()))
{
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
}
else
{
if
(
!
main_thread_event_name
.
count
(
rit
->
name
()))
{
event_name
=
origin_name
.
substr
(
0
,
split_pos
+
1
)
+
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
origin_name
.
substr
(
split_pos
+
1
,
origin_name
.
length
()
-
1
);
}
else
{
event_name
=
rit
->
name
();
}
}
}
else
{
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
}
}
auto
print_name_size
=
event_name
.
size
();
int
found_pos
=
0
;
...
...
@@ -608,6 +636,16 @@ void AnalyzeEvent(
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
,
EventSortingKey
sorted_by
,
size_t
*
max_name_width
,
OverHead
*
overhead
,
bool
merge_thread
)
{
// In oreder to deal with special event in main thread
std
::
set
<
std
::
string
>
main_thread_event_name
;
for
(
size_t
i
=
0
;
i
<
(
*
analyze_events
).
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
(
*
analyze_events
)[
i
].
size
();
j
++
)
{
Event
event
=
(
*
analyze_events
)[
i
][
j
];
if
(
event
.
role
()
==
EventRole
::
kSpecial
)
{
main_thread_event_name
.
insert
(
event
.
name
());
}
}
}
for
(
size_t
i
=
0
;
i
<
(
*
analyze_events
).
size
();
i
++
)
{
double
total
=
0.
;
// the total time in one thread
std
::
list
<
Event
>
pushed_events
;
...
...
@@ -618,8 +656,10 @@ void AnalyzeEvent(
for
(
size_t
j
=
0
;
j
<
(
*
analyze_events
)[
i
].
size
();
j
++
)
{
Event
analyze_event
=
(
*
analyze_events
)[
i
][
j
];
SetEvent
(
merge_thread
,
analyze_event
,
max_name_width
,
&
pushed_events
,
&
event_items
,
&
event_idx
);
if
(
!
(
analyze_event
.
role
()
==
EventRole
::
kSpecial
&&
!
merge_thread
))
{
SetEvent
(
merge_thread
,
analyze_event
,
max_name_width
,
&
pushed_events
,
&
event_items
,
&
event_idx
,
main_thread_event_name
);
}
}
auto
table_size
=
event_items
.
size
();
...
...
paddle/fluid/platform/profiler_test.cc
浏览文件 @
dbfe5333
...
...
@@ -59,7 +59,7 @@ TEST(RecordEvent, RecordEvent) {
PushEvent
(
name
,
EventRole
::
kOrdinary
);
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
PopEvent
(
name
);
PopEvent
(
name
,
EventRole
::
kOrdinary
);
}
}
...
...
@@ -109,7 +109,7 @@ TEST(RecordEvent, RecordEvent) {
// Bad Usage:
PushEvent
(
"event_without_pop"
,
EventRole
::
kOrdinary
);
PopEvent
(
"event_without_push"
);
PopEvent
(
"event_without_push"
,
EventRole
::
kOrdinary
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
GetAllEvents
();
int
cuda_startup_count
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录