Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
dbfe5333
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dbfe5333
编写于
5月 25, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
5月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add pe profiler Event (#24611)
上级
55b664a1
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
98 addition
and
18 deletion
+98
-18
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+3
-0
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+40
-3
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+1
-1
paddle/fluid/platform/event.h
paddle/fluid/platform/event.h
+1
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+4
-5
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+1
-1
paddle/fluid/platform/profiler_helper.h
paddle/fluid/platform/profiler_helper.h
+46
-6
paddle/fluid/platform/profiler_test.cc
paddle/fluid/platform/profiler_test.cc
+2
-2
未找到文件。
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
dbfe5333
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_double
(
eager_delete_tensor_gb
);
...
...
@@ -820,6 +821,8 @@ void ParallelExecutor::BCastParamsToDevices(
FetchResultType
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
bool
return_merged
)
{
VLOG
(
3
)
<<
"enter ParallelExecutor Run"
;
platform
::
RecordEvent
parallel_executor_event
(
"ParallelExecutor::Run"
,
paddle
::
platform
::
EventRole
::
kSpecial
);
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
...
...
paddle/fluid/platform/device_tracer.cc
浏览文件 @
dbfe5333
...
...
@@ -40,6 +40,9 @@ namespace {
thread_local
std
::
deque
<
int
>
block_id_stack
;
// Tracking the nested event stacks.
thread_local
std
::
deque
<
Event
*>
annotation_stack
;
// stack to strore event sunch as pe and so on
static
std
::
deque
<
Event
*>
main_thread_annotation_stack
{};
static
std
::
deque
<
std
::
string
>
main_thread_annotation_stack_name
{};
std
::
map
<
uint32_t
,
int32_t
>
system_thread_id_map
;
...
...
@@ -638,15 +641,49 @@ DeviceTracer *GetDeviceTracer() {
return
tracer
;
}
void
SetCurAnnotation
(
Event
*
event
)
{
if
(
!
annotation_stack
.
empty
())
{
std
::
string
SetCurAnnotation
(
Event
*
event
)
{
std
::
string
ret
;
if
(
!
annotation_stack
.
empty
()
&&
event
->
role
()
!=
EventRole
::
kSpecial
)
{
event
->
set_parent
(
annotation_stack
.
back
());
event
->
set_name
(
annotation_stack
.
back
()
->
name
()
+
"/"
+
event
->
name
());
}
annotation_stack
.
push_back
(
event
);
if
(
!
main_thread_annotation_stack_name
.
empty
()
&&
!
annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
thread_id
()
!=
annotation_stack
.
back
()
->
thread_id
())
{
ret
=
main_thread_annotation_stack_name
.
back
()
+
"/"
+
event
->
name
();
}
else
{
ret
=
event
->
name
();
}
if
(
event
->
role
()
==
EventRole
::
kSpecial
)
{
std
::
string
name
=
event
->
name
();
if
(
!
main_thread_annotation_stack_name
.
empty
())
{
name
=
main_thread_annotation_stack_name
.
back
()
+
"/"
+
event
->
name
();
}
main_thread_annotation_stack_name
.
push_back
(
name
);
main_thread_annotation_stack
.
push_back
(
event
);
}
return
ret
;
}
void
ClearCurAnnotation
()
{
annotation_stack
.
pop_back
();
}
void
ClearCurAnnotation
()
{
if
(
!
main_thread_annotation_stack_name
.
empty
()
&&
!
annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
thread_id
()
!=
annotation_stack
.
back
()
->
thread_id
())
{
annotation_stack
.
back
()
->
set_name
(
main_thread_annotation_stack_name
.
back
()
+
"/"
+
annotation_stack
.
back
()
->
name
());
}
if
(
!
main_thread_annotation_stack
.
empty
()
&&
main_thread_annotation_stack
.
back
()
->
name
()
==
annotation_stack
.
back
()
->
name
())
{
main_thread_annotation_stack_name
.
pop_back
();
main_thread_annotation_stack
.
pop_back
();
}
annotation_stack
.
pop_back
();
}
Event
*
CurAnnotation
()
{
if
(
annotation_stack
.
empty
())
return
nullptr
;
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
dbfe5333
...
...
@@ -137,7 +137,7 @@ class DeviceTracer {
DeviceTracer
*
GetDeviceTracer
();
// Set a name for the cuda kernel operation being launched by the thread.
void
SetCurAnnotation
(
Event
*
event
);
std
::
string
SetCurAnnotation
(
Event
*
event
);
// Clear the name after the operation is done.
void
ClearCurAnnotation
();
// Current name of the operation being run in the thread.
...
...
paddle/fluid/platform/event.h
浏览文件 @
dbfe5333
...
...
@@ -29,6 +29,7 @@ enum class EventRole {
kOrdinary
,
// only record op time with op type key
kInnerOp
,
// record op detail time with op type key
kUniqueOp
,
// record op detail time with op unique name key
kSpecial
,
// record event such as PE which is outer of thread local
};
class
Event
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
dbfe5333
...
...
@@ -73,8 +73,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
// lock is not needed, the code below is thread-safe
Event
*
e
=
PushEvent
(
name
,
role
);
// Maybe need the same push/pop behavior.
SetCurAnnotation
(
e
);
name_
=
e
->
name
();
name_
=
SetCurAnnotation
(
e
);
}
RecordEvent
::~
RecordEvent
()
{
...
...
@@ -86,7 +85,7 @@ RecordEvent::~RecordEvent() {
BlockDepth
(),
g_thread_id
);
}
ClearCurAnnotation
();
PopEvent
(
name_
);
PopEvent
(
name_
,
role_
);
}
void
MemEvenRecorder
::
PushMemRecord
(
const
void
*
ptr
,
const
Place
&
place
,
...
...
@@ -187,8 +186,8 @@ Event *PushEvent(const std::string &name, const EventRole role) {
return
GetEventList
().
Record
(
EventType
::
kPushRange
,
name
,
g_thread_id
,
role
);
}
void
PopEvent
(
const
std
::
string
&
name
)
{
GetEventList
().
Record
(
EventType
::
kPopRange
,
name
,
g_thread_id
);
void
PopEvent
(
const
std
::
string
&
name
,
const
EventRole
role
)
{
GetEventList
().
Record
(
EventType
::
kPopRange
,
name
,
g_thread_id
,
role
);
}
void
EnableProfiler
(
ProfilerState
state
)
{
PADDLE_ENFORCE_NE
(
state
,
ProfilerState
::
kDisabled
,
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
dbfe5333
...
...
@@ -197,7 +197,7 @@ void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
void
PopMemEvent
(
uint64_t
start_ns
,
uint64_t
end_ns
,
size_t
bytes
,
const
Place
&
place
,
const
std
::
string
&
annotation
);
Event
*
PushEvent
(
const
std
::
string
&
name
,
const
EventRole
role
);
void
PopEvent
(
const
std
::
string
&
name
);
void
PopEvent
(
const
std
::
string
&
name
,
const
EventRole
role
);
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
...
...
paddle/fluid/platform/profiler_helper.h
浏览文件 @
dbfe5333
...
...
@@ -22,12 +22,12 @@ limitations under the License. */
#include <memory>
#include <mutex> // NOLINT
#include <random>
#include <set>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
...
...
@@ -283,7 +283,8 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
void
SetEvent
(
bool
merge_thread
,
const
Event
&
analyze_event
,
size_t
*
max_name_width
,
std
::
list
<
Event
>
*
pushed_events
,
std
::
vector
<
EventItem
>
*
event_items
,
std
::
unordered_map
<
std
::
string
,
int
>
*
event_idx
)
{
std
::
unordered_map
<
std
::
string
,
int
>
*
event_idx
,
const
std
::
set
<
std
::
string
>
&
main_thread_event_name
)
{
if
(
analyze_event
.
type
()
==
EventType
::
kPushRange
)
{
pushed_events
->
push_back
(
analyze_event
);
}
else
if
(
analyze_event
.
type
()
==
EventType
::
kPopRange
)
{
...
...
@@ -312,10 +313,37 @@ void SetEvent(bool merge_thread, const Event &analyze_event,
std
::
string
event_name
;
if
(
merge_thread
)
{
event_name
=
rit
->
name
();
}
else
{
if
(
!
main_thread_event_name
.
empty
())
{
auto
origin_name
=
rit
->
name
();
int
index
=
1
;
int
split_pos
=
0
;
while
((
split_pos
=
FindNthReversePos
(
origin_name
,
'/'
,
index
))
!=
-
1
)
{
auto
prefix_str
=
origin_name
.
substr
(
0
,
split_pos
);
if
(
main_thread_event_name
.
count
(
prefix_str
))
{
break
;
}
index
++
;
}
if
(
split_pos
==
-
1
&&
!
main_thread_event_name
.
count
(
rit
->
name
()))
{
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
}
else
{
if
(
!
main_thread_event_name
.
count
(
rit
->
name
()))
{
event_name
=
origin_name
.
substr
(
0
,
split_pos
+
1
)
+
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
origin_name
.
substr
(
split_pos
+
1
,
origin_name
.
length
()
-
1
);
}
else
{
event_name
=
rit
->
name
();
}
}
}
else
{
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
}
}
auto
print_name_size
=
event_name
.
size
();
int
found_pos
=
0
;
if
(
rit
->
role
()
==
EventRole
::
kInnerOp
&&
...
...
@@ -608,6 +636,16 @@ void AnalyzeEvent(
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
,
EventSortingKey
sorted_by
,
size_t
*
max_name_width
,
OverHead
*
overhead
,
bool
merge_thread
)
{
// In oreder to deal with special event in main thread
std
::
set
<
std
::
string
>
main_thread_event_name
;
for
(
size_t
i
=
0
;
i
<
(
*
analyze_events
).
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
(
*
analyze_events
)[
i
].
size
();
j
++
)
{
Event
event
=
(
*
analyze_events
)[
i
][
j
];
if
(
event
.
role
()
==
EventRole
::
kSpecial
)
{
main_thread_event_name
.
insert
(
event
.
name
());
}
}
}
for
(
size_t
i
=
0
;
i
<
(
*
analyze_events
).
size
();
i
++
)
{
double
total
=
0.
;
// the total time in one thread
std
::
list
<
Event
>
pushed_events
;
...
...
@@ -618,8 +656,10 @@ void AnalyzeEvent(
for
(
size_t
j
=
0
;
j
<
(
*
analyze_events
)[
i
].
size
();
j
++
)
{
Event
analyze_event
=
(
*
analyze_events
)[
i
][
j
];
if
(
!
(
analyze_event
.
role
()
==
EventRole
::
kSpecial
&&
!
merge_thread
))
{
SetEvent
(
merge_thread
,
analyze_event
,
max_name_width
,
&
pushed_events
,
&
event_items
,
&
event_idx
);
&
event_items
,
&
event_idx
,
main_thread_event_name
);
}
}
auto
table_size
=
event_items
.
size
();
...
...
paddle/fluid/platform/profiler_test.cc
浏览文件 @
dbfe5333
...
...
@@ -59,7 +59,7 @@ TEST(RecordEvent, RecordEvent) {
PushEvent
(
name
,
EventRole
::
kOrdinary
);
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
PopEvent
(
name
);
PopEvent
(
name
,
EventRole
::
kOrdinary
);
}
}
...
...
@@ -109,7 +109,7 @@ TEST(RecordEvent, RecordEvent) {
// Bad Usage:
PushEvent
(
"event_without_pop"
,
EventRole
::
kOrdinary
);
PopEvent
(
"event_without_push"
);
PopEvent
(
"event_without_push"
,
EventRole
::
kOrdinary
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
GetAllEvents
();
int
cuda_startup_count
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录