Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8456c3f4
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8456c3f4
编写于
3月 02, 2020
作者:
W
wangchaochaohu
提交者:
GitHub
3月 02, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
polish the profiler_help code (#22811)
上级
2fd1ec1e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
80 addition
and
72 deletion
+80
-72
paddle/fluid/platform/profiler_helper.h
paddle/fluid/platform/profiler_helper.h
+80
-72
未找到文件。
paddle/fluid/platform/profiler_helper.h
浏览文件 @
8456c3f4
...
...
@@ -394,6 +394,38 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map,
}
}
void
PrintOverHead
(
const
OverHead
&
overhead
,
const
size_t
data_width
)
{
double
compute_time
=
overhead
.
total_time
*
overhead
.
compute_ratio
;
double
framework_time
=
overhead
.
total_time
*
overhead
.
framework_ratio
;
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
"Total time: "
<<
overhead
.
total_time
<<
std
::
endl
;
std
::
cout
<<
std
::
setw
(
25
)
<<
" Computation time"
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
compute_time
<<
"Ratio: "
<<
overhead
.
compute_ratio
*
100
<<
"%"
<<
std
::
endl
;
std
::
cout
<<
std
::
setw
(
25
)
<<
" Framework overhead"
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
framework_time
<<
"Ratio: "
<<
overhead
.
framework_ratio
*
100
<<
"%"
<<
std
::
endl
;
std
::
cout
<<
"
\n
-------------------------"
<<
" GpuMemCpy Summary "
<<
"-------------------------
\n\n
"
;
std
::
cout
<<
std
::
setw
(
25
)
<<
"GpuMemcpy"
<<
"Calls: "
<<
std
::
setw
(
data_width
)
<<
overhead
.
memcpy_item
.
calls
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
overhead
.
memcpy_item
.
total_time
<<
"Ratio: "
<<
overhead
.
memcpy_item
.
ratio
*
100
<<
"%"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
overhead
.
sub_memcpy_items
.
size
();
++
i
)
{
EventItem
item
=
overhead
.
sub_memcpy_items
[
i
];
if
(
item
.
calls
!=
0
)
{
std
::
cout
<<
std
::
setw
(
25
)
<<
" "
+
item
.
name
<<
"Calls: "
<<
std
::
setw
(
data_width
)
<<
item
.
calls
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
item
.
total_time
<<
"Ratio: "
<<
item
.
ratio
*
100
<<
"%"
<<
std
::
endl
;
}
}
}
// Print results
void
PrintProfiler
(
const
std
::
vector
<
std
::
vector
<
EventItem
>>
&
events_table
,
const
std
::
multimap
<
std
::
string
,
EventItem
>
&
child_map
,
...
...
@@ -428,38 +460,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<<
" in descending order in the same thread
\n\n
"
;
if
(
overhead
.
print
)
{
double
compute_time
=
overhead
.
total_time
*
overhead
.
compute_ratio
;
double
framework_time
=
overhead
.
total_time
*
overhead
.
framework_ratio
;
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
"Total time: "
<<
overhead
.
total_time
<<
std
::
endl
;
std
::
cout
<<
std
::
setw
(
25
)
<<
" Computation time"
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
compute_time
<<
"Ratio: "
<<
overhead
.
compute_ratio
*
100
<<
"%"
<<
std
::
endl
;
std
::
cout
<<
std
::
setw
(
25
)
<<
" Framework overhead"
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
framework_time
<<
"Ratio: "
<<
overhead
.
framework_ratio
*
100
<<
"%"
<<
std
::
endl
;
std
::
cout
<<
"
\n
-------------------------"
<<
" GpuMemCpy Summary "
<<
"-------------------------
\n\n
"
;
std
::
cout
<<
std
::
setw
(
25
)
<<
"GpuMemcpy"
<<
"Calls: "
<<
std
::
setw
(
data_width
)
<<
overhead
.
memcpy_item
.
calls
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
overhead
.
memcpy_item
.
total_time
<<
"Ratio: "
<<
overhead
.
memcpy_item
.
ratio
*
100
<<
"%"
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
overhead
.
sub_memcpy_items
.
size
();
++
i
)
{
EventItem
item
=
overhead
.
sub_memcpy_items
[
i
];
if
(
item
.
calls
!=
0
)
{
std
::
cout
<<
std
::
setw
(
25
)
<<
" "
+
item
.
name
<<
"Calls: "
<<
std
::
setw
(
data_width
)
<<
item
.
calls
<<
"Total: "
<<
std
::
setw
(
data_width
)
<<
item
.
total_time
<<
"Ratio: "
<<
item
.
ratio
*
100
<<
"%"
<<
std
::
endl
;
}
}
PrintOverHead
(
overhead
,
data_width
);
}
std
::
cout
<<
"
\n
-------------------------"
<<
" Event Summary "
...
...
@@ -522,37 +523,13 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
}
}
// Parse the event list and output the profiling report
void
ParseEvents
(
const
std
::
vector
<
std
::
vector
<
Event
>>
&
events
,
bool
merge_thread
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
if
(
merge_thread
&&
events
.
size
()
<
2
)
return
;
std
::
string
sorted_domain
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
sorted_func
=
SetSortedFunc
(
sorted_by
,
&
sorted_domain
);
const
std
::
vector
<
std
::
vector
<
Event
>>
*
analyze_events
;
std
::
vector
<
std
::
vector
<
Event
>>
merged_events_list
;
if
(
merge_thread
)
{
std
::
vector
<
Event
>
merged_events
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
++
j
)
{
merged_events
.
push_back
(
events
[
i
][
j
]);
}
}
merged_events_list
.
push_back
(
merged_events
);
analyze_events
=
&
merged_events_list
;
}
else
{
analyze_events
=
&
events
;
}
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
std
::
multimap
<
std
::
string
,
EventItem
>
child_map
;
size_t
max_name_width
=
0
;
OverHead
overhead
;
void
AnalyzeEvent
(
const
std
::
vector
<
std
::
vector
<
Event
>>
*
analyze_events
,
std
::
vector
<
std
::
vector
<
EventItem
>>
*
events_table
,
std
::
multimap
<
std
::
string
,
EventItem
>
*
child_map
,
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
,
EventSortingKey
sorted_by
,
size_t
*
max_name_width
,
OverHead
*
overhead
,
bool
merge_thread
)
{
for
(
size_t
i
=
0
;
i
<
(
*
analyze_events
).
size
();
i
++
)
{
double
total
=
0.
;
// the total time in one thread
std
::
list
<
Event
>
pushed_events
;
...
...
@@ -563,7 +540,7 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
for
(
size_t
j
=
0
;
j
<
(
*
analyze_events
)[
i
].
size
();
j
++
)
{
Event
analyze_event
=
(
*
analyze_events
)[
i
][
j
];
SetEvent
(
merge_thread
,
analyze_event
,
&
max_name_width
,
&
pushed_events
,
SetEvent
(
merge_thread
,
analyze_event
,
max_name_width
,
&
pushed_events
,
&
event_items
,
&
event_idx
);
}
...
...
@@ -593,7 +570,6 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
total
+=
event_items
[
j
].
total_time
;
}
}
// average time
for
(
auto
&
item
:
main_event_items
)
{
item
.
ave_time
=
item
.
total_time
/
item
.
calls
;
...
...
@@ -603,20 +579,18 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
it
->
second
.
ratio
=
it
->
second
.
total_time
/
total
;
it
->
second
.
ave_time
=
it
->
second
.
total_time
/
it
->
second
.
calls
;
}
// When multi-threaded, overhead are printed only if merge_thread is true
if
((
*
analyze_events
).
size
()
==
1
)
{
overhead
.
total_time
=
total
;
overhead
.
print
=
true
;
ComputeOverhead
(
sub_child_map
,
&
overhead
);
overhead
->
total_time
=
total
;
overhead
->
print
=
true
;
ComputeOverhead
(
sub_child_map
,
overhead
);
}
// sort
if
(
sorted_by
!=
EventSortingKey
::
kDefault
)
{
std
::
sort
(
main_event_items
.
begin
(),
main_event_items
.
end
(),
sorted_func
);
}
events_table
.
push_back
(
main_event_items
);
events_table
->
push_back
(
main_event_items
);
// log warning if there are events with `push` but without `pop`
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
())
{
...
...
@@ -625,9 +599,43 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
++
rit
;
}
GetChildMap
(
sub_child_map
,
&
child_map
);
GetChildMap
(
sub_child_map
,
child_map
);
}
}
// Parse the event list and output the profiling report
void
ParseEvents
(
const
std
::
vector
<
std
::
vector
<
Event
>>
&
events
,
bool
merge_thread
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
if
(
merge_thread
&&
events
.
size
()
<
2
)
return
;
std
::
string
sorted_domain
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
sorted_func
=
SetSortedFunc
(
sorted_by
,
&
sorted_domain
);
const
std
::
vector
<
std
::
vector
<
Event
>>
*
analyze_events
;
std
::
vector
<
std
::
vector
<
Event
>>
merged_events_list
;
if
(
merge_thread
)
{
std
::
vector
<
Event
>
merged_events
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
++
j
)
{
merged_events
.
push_back
(
events
[
i
][
j
]);
}
}
merged_events_list
.
push_back
(
merged_events
);
analyze_events
=
&
merged_events_list
;
}
else
{
analyze_events
=
&
events
;
}
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
std
::
multimap
<
std
::
string
,
EventItem
>
child_map
;
size_t
max_name_width
=
0
;
OverHead
overhead
;
AnalyzeEvent
(
analyze_events
,
&
events_table
,
&
child_map
,
sorted_func
,
sorted_by
,
&
max_name_width
,
&
overhead
,
merge_thread
);
// Print report
PrintProfiler
(
events_table
,
child_map
,
overhead
,
sorted_domain
,
max_name_width
+
8
,
12
,
merge_thread
,
0
,
0
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录