Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
df3b250c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2310
Star
20933
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
df3b250c
编写于
1月 05, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix bad_alloc bug & refine code in profiler
上级
367a5c9e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
88 addition
and
72 deletion
+88
-72
paddle/platform/profiler.cc
paddle/platform/profiler.cc
+79
-71
paddle/platform/profiler.h
paddle/platform/profiler.h
+4
-0
paddle/platform/profiler_test.cc
paddle/platform/profiler_test.cc
+5
-1
未找到文件。
paddle/platform/profiler.cc
浏览文件 @
df3b250c
...
...
@@ -15,12 +15,16 @@ limitations under the License. */
#include "paddle/platform/profiler.h"
#include <iomanip>
#include <map>
#include "gflags/gflags.h"
#include "glog/logging.h"
namespace
paddle
{
namespace
platform
{
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// To record which timer the profiler used, CUDA or CPU.
static
std
::
string
g_profiler_place
=
""
;
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
static
thread_local
int32_t
g_thread_id
;
...
...
@@ -45,10 +49,7 @@ inline uint64_t GetTimeInNsec() {
Event
::
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
std
::
move
(
name
)),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
if
(
cuda_dev_ctx
)
{
...
...
@@ -115,22 +116,27 @@ inline EventList& GetEventList() {
}
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kMark
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
EventKind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
dev_ctx_
=
dev_ctx
;
name_
=
name
;
GetEventList
().
Record
(
EventKind
::
kPushRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx_
);
PushEvent
(
name_
,
dev_ctx_
);
}
RecordEvent
::~
RecordEvent
()
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
GetEventList
().
Record
(
EventKind
::
kPopRange
,
std
::
move
(
name_
),
g_thread_id
,
dev_ctx_
);
PopEvent
(
name_
,
dev_ctx_
);
}
void
EnableProfiler
(
ProfilerState
state
)
{
...
...
@@ -141,6 +147,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling "
,
"EnableProfiler."
);
g_state
=
state
;
g_profiler_place
=
(
g_state
==
ProfilerState
::
kCUDA
)
?
"CUDA"
:
"CPU"
;
#ifdef PADDLE_WITH_CUDA
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
// Generate some dummy evenets first to reduce the startup overhead.
...
...
@@ -172,56 +179,8 @@ std::vector<std::vector<Event>> DisableProfiler() {
return
result
;
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
)
{
// Output header information
std
::
cout
<<
"------------------------->"
<<
" Profiling Report "
<<
"<-------------------------"
<<
"
\n\n
"
;
#ifdef PADDLE_WITH_CUDA
std
::
cout
<<
"Place: GPU"
<<
std
::
endl
;
#else
std
::
cout
<<
"Place: CPU"
<<
std
::
endl
;
#endif
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
string
sort_domain
=
"event end time"
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
sort_domain
=
"number of calls"
;
break
;
case
EventSortingKey
::
kTotal
:
sort_domain
=
"total time"
;
break
;
case
EventSortingKey
::
kMin
:
sort_domain
=
"minimum time"
;
break
;
case
EventSortingKey
::
kMax
:
sort_domain
=
"maximum time"
;
break
;
case
EventSortingKey
::
kAve
:
sort_domain
=
"average time"
;
break
;
default:
if
(
sorted_by
!=
EventSortingKey
::
kDefault
)
{
std
::
cout
<<
"Warning: unkown sorting key. "
;
sorted_by
=
EventSortingKey
::
kDefault
;
}
}
std
::
cout
<<
"Sorted by "
<<
sort_domain
<<
" in descending order in the same thread
\n\n
"
;
// Parse events
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
size_t
max_name_width
=
0
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
i
++
)
{
...
...
@@ -234,19 +193,19 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
pushed_events
.
push_back
(
events
[
i
][
j
]);
}
else
if
(
events
[
i
][
j
].
kind
()
==
"pop"
)
{
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
->
name
()
!=
events
[
i
][
j
].
name
()
&&
rit
!=
pushed_events
.
rend
())
{
while
(
rit
!=
pushed_events
.
rend
()
&&
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
++
rit
;
}
if
(
rit
!=
pushed_events
.
rend
())
{
#ifdef PADDLE_WITH_CUDA
double
event_time
=
rit
->
CudaElapsedMs
(
events
[
i
][
j
]);
#else
double
event_time
=
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
#endif
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
std
::
string
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
max_name_width
=
std
::
max
(
max_name_width
,
event_name
.
size
());
if
(
event_idx
.
find
(
event_name
)
==
event_idx
.
end
())
{
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
...
...
@@ -264,11 +223,13 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
event_items
[
index
].
max_time
=
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
}
// remove the start marker from the list
pushed_events
.
erase
((
++
rit
).
base
());
}
else
{
std
::
cout
<<
"Warning: can not find the start marker of event "
<<
events
[
i
][
j
].
name
();
LOG
(
WARNING
)
<<
"Cannot find the push marker of event
\'
"
<<
events
[
i
][
j
].
name
()
<<
"
\'
, which will be ignored in profiling report."
;
}
}
}
...
...
@@ -294,19 +255,65 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
}
});
}
events_table
.
push_back
(
event_items
);
// To check whether there are events with `push` but without `pop`
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
())
{
if
(
rit
->
kind
()
==
"push"
)
{
LOG
(
WARNING
)
<<
"Cannot find the pop marker of event
\'
"
<<
rit
->
name
()
<<
"
\'
, which will be ignored in profiling report."
;
}
++
rit
;
}
}
// output events table
// Print report
PrintProfilingReport
(
events_table
,
sorted_by
,
max_name_width
+
4
,
12
);
}
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
EventSortingKey
sorted_by
,
const
size_t
name_width
,
const
size_t
data_width
)
{
if
(
g_profiler_place
==
""
)
return
;
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
cout
<<
"Place: "
<<
g_profiler_place
<<
std
::
endl
;
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
string
sort_domain
=
"event end time"
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
sort_domain
=
"number of calls"
;
break
;
case
EventSortingKey
::
kTotal
:
sort_domain
=
"total time"
;
break
;
case
EventSortingKey
::
kMin
:
sort_domain
=
"minimum time"
;
break
;
case
EventSortingKey
::
kMax
:
sort_domain
=
"maximum time"
;
break
;
case
EventSortingKey
::
kAve
:
sort_domain
=
"average time"
;
break
;
default:
break
;
}
std
::
cout
<<
"Sorted by "
<<
sort_domain
<<
" in descending order in the same thread
\n\n
"
;
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
const
int
data_width
=
12
;
std
::
cout
<<
std
::
setw
(
max_name_width
+
4
)
<<
"Event"
<<
std
::
setw
(
data_width
)
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
max_name_width
+
4
)
<<
event_item
.
name
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
...
...
@@ -314,6 +321,7 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace platform
...
...
paddle/platform/profiler.h
浏览文件 @
df3b250c
...
...
@@ -134,5 +134,9 @@ enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
EventSortingKey
sorted_by
,
const
size_t
name_width
,
const
size_t
data_width
);
}
// namespace platform
}
// namespace paddle
paddle/platform/profiler_test.cc
浏览文件 @
df3b250c
...
...
@@ -99,8 +99,12 @@ TEST(RecordEvent, RecordEvent) {
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
}
// Bad Usage:
PushEvent
(
"event_without_pop"
,
dev_ctx
);
PopEvent
(
"event_without_push"
,
dev_ctx
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
DisableProfiler
();
// Will remove
from test before merging
// Will remove
parsing-related code from test later
ParseEvents
(
events
,
EventSortingKey
::
kTotal
);
int
cuda_startup_count
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录