Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
df3b250c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
df3b250c
编写于
1月 05, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix bad_alloc bug & refine code in profiler
上级
367a5c9e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
88 addition
and
72 deletion
+88
-72
paddle/platform/profiler.cc
paddle/platform/profiler.cc
+79
-71
paddle/platform/profiler.h
paddle/platform/profiler.h
+4
-0
paddle/platform/profiler_test.cc
paddle/platform/profiler_test.cc
+5
-1
未找到文件。
paddle/platform/profiler.cc
浏览文件 @
df3b250c
...
...
@@ -15,12 +15,16 @@ limitations under the License. */
#include "paddle/platform/profiler.h"
#include <iomanip>
#include <map>
#include "gflags/gflags.h"
#include "glog/logging.h"
namespace
paddle
{
namespace
platform
{
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// To record which timer the profiler used, CUDA or CPU.
static
std
::
string
g_profiler_place
=
""
;
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
static
thread_local
int32_t
g_thread_id
;
...
...
@@ -45,10 +49,7 @@ inline uint64_t GetTimeInNsec() {
Event
::
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
std
::
move
(
name
)),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
if
(
cuda_dev_ctx
)
{
...
...
@@ -115,22 +116,27 @@ inline EventList& GetEventList() {
}
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kMark
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
EventKind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
dev_ctx_
=
dev_ctx
;
name_
=
name
;
GetEventList
().
Record
(
EventKind
::
kPushRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx_
);
PushEvent
(
name_
,
dev_ctx_
);
}
RecordEvent
::~
RecordEvent
()
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
GetEventList
().
Record
(
EventKind
::
kPopRange
,
std
::
move
(
name_
),
g_thread_id
,
dev_ctx_
);
PopEvent
(
name_
,
dev_ctx_
);
}
void
EnableProfiler
(
ProfilerState
state
)
{
...
...
@@ -141,6 +147,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling "
,
"EnableProfiler."
);
g_state
=
state
;
g_profiler_place
=
(
g_state
==
ProfilerState
::
kCUDA
)
?
"CUDA"
:
"CPU"
;
#ifdef PADDLE_WITH_CUDA
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
// Generate some dummy evenets first to reduce the startup overhead.
...
...
@@ -172,56 +179,8 @@ std::vector<std::vector<Event>> DisableProfiler() {
return
result
;
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
)
{
// Output header information
std
::
cout
<<
"------------------------->"
<<
" Profiling Report "
<<
"<-------------------------"
<<
"
\n\n
"
;
#ifdef PADDLE_WITH_CUDA
std
::
cout
<<
"Place: GPU"
<<
std
::
endl
;
#else
std
::
cout
<<
"Place: CPU"
<<
std
::
endl
;
#endif
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
string
sort_domain
=
"event end time"
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
sort_domain
=
"number of calls"
;
break
;
case
EventSortingKey
::
kTotal
:
sort_domain
=
"total time"
;
break
;
case
EventSortingKey
::
kMin
:
sort_domain
=
"minimum time"
;
break
;
case
EventSortingKey
::
kMax
:
sort_domain
=
"maximum time"
;
break
;
case
EventSortingKey
::
kAve
:
sort_domain
=
"average time"
;
break
;
default:
if
(
sorted_by
!=
EventSortingKey
::
kDefault
)
{
std
::
cout
<<
"Warning: unkown sorting key. "
;
sorted_by
=
EventSortingKey
::
kDefault
;
}
}
std
::
cout
<<
"Sorted by "
<<
sort_domain
<<
" in descending order in the same thread
\n\n
"
;
// Parse events
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
size_t
max_name_width
=
0
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
i
++
)
{
...
...
@@ -234,19 +193,19 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
pushed_events
.
push_back
(
events
[
i
][
j
]);
}
else
if
(
events
[
i
][
j
].
kind
()
==
"pop"
)
{
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
->
name
()
!=
events
[
i
][
j
].
name
()
&&
rit
!=
pushed_events
.
rend
())
{
while
(
rit
!=
pushed_events
.
rend
()
&&
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
++
rit
;
}
if
(
rit
!=
pushed_events
.
rend
())
{
#ifdef PADDLE_WITH_CUDA
double
event_time
=
rit
->
CudaElapsedMs
(
events
[
i
][
j
]);
#else
double
event_time
=
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
#endif
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
std
::
string
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
max_name_width
=
std
::
max
(
max_name_width
,
event_name
.
size
());
if
(
event_idx
.
find
(
event_name
)
==
event_idx
.
end
())
{
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
...
...
@@ -264,11 +223,13 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
event_items
[
index
].
max_time
=
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
}
// remove the start marker from the list
pushed_events
.
erase
((
++
rit
).
base
());
}
else
{
std
::
cout
<<
"Warning: can not find the start marker of event "
<<
events
[
i
][
j
].
name
();
LOG
(
WARNING
)
<<
"Cannot find the push marker of event
\'
"
<<
events
[
i
][
j
].
name
()
<<
"
\'
, which will be ignored in profiling report."
;
}
}
}
...
...
@@ -294,19 +255,65 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
}
});
}
events_table
.
push_back
(
event_items
);
// To check whether there are events with `push` but without `pop`
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
())
{
if
(
rit
->
kind
()
==
"push"
)
{
LOG
(
WARNING
)
<<
"Cannot find the pop marker of event
\'
"
<<
rit
->
name
()
<<
"
\'
, which will be ignored in profiling report."
;
}
++
rit
;
}
}
// output events table
// Print report
PrintProfilingReport
(
events_table
,
sorted_by
,
max_name_width
+
4
,
12
);
}
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
EventSortingKey
sorted_by
,
const
size_t
name_width
,
const
size_t
data_width
)
{
if
(
g_profiler_place
==
""
)
return
;
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
cout
<<
"Place: "
<<
g_profiler_place
<<
std
::
endl
;
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
string
sort_domain
=
"event end time"
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
sort_domain
=
"number of calls"
;
break
;
case
EventSortingKey
::
kTotal
:
sort_domain
=
"total time"
;
break
;
case
EventSortingKey
::
kMin
:
sort_domain
=
"minimum time"
;
break
;
case
EventSortingKey
::
kMax
:
sort_domain
=
"maximum time"
;
break
;
case
EventSortingKey
::
kAve
:
sort_domain
=
"average time"
;
break
;
default:
break
;
}
std
::
cout
<<
"Sorted by "
<<
sort_domain
<<
" in descending order in the same thread
\n\n
"
;
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
const
int
data_width
=
12
;
std
::
cout
<<
std
::
setw
(
max_name_width
+
4
)
<<
"Event"
<<
std
::
setw
(
data_width
)
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
max_name_width
+
4
)
<<
event_item
.
name
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
...
...
@@ -314,6 +321,7 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace platform
...
...
paddle/platform/profiler.h
浏览文件 @
df3b250c
...
...
@@ -134,5 +134,9 @@ enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
EventSortingKey
sorted_by
,
const
size_t
name_width
,
const
size_t
data_width
);
}
// namespace platform
}
// namespace paddle
paddle/platform/profiler_test.cc
浏览文件 @
df3b250c
...
...
@@ -99,8 +99,12 @@ TEST(RecordEvent, RecordEvent) {
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
}
// Bad Usage:
PushEvent
(
"event_without_pop"
,
dev_ctx
);
PopEvent
(
"event_without_push"
,
dev_ctx
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
DisableProfiler
();
// Will remove
from test before merging
// Will remove
parsing-related code from test later
ParseEvents
(
events
,
EventSortingKey
::
kTotal
);
int
cuda_startup_count
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录