Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5e90f5e1
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5e90f5e1
编写于
1月 08, 2018
作者:
Y
Yibing Liu
提交者:
GitHub
1月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7043 from kuke/profiler_tool
Add the parsing part for the profiling tool
上级
c62383e9
6416eee7
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
238 addition
and
21 deletion
+238
-21
paddle/platform/profiler.cc
paddle/platform/profiler.cc
+170
-14
paddle/platform/profiler.h
paddle/platform/profiler.h
+30
-2
paddle/platform/profiler_test.cc
paddle/platform/profiler_test.cc
+38
-5
未找到文件。
paddle/platform/profiler.cc
浏览文件 @
5e90f5e1
...
...
@@ -13,12 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/profiler.h"
#include <iomanip>
#include <map>
#include "glog/logging.h"
namespace
paddle
{
namespace
platform
{
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// To record which timer the profiler used, CUDA or CPU.
static
std
::
string
g_profiler_place
=
""
;
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
static
thread_local
int32_t
g_thread_id
;
...
...
@@ -43,10 +48,7 @@ inline uint64_t GetTimeInNsec() {
Event
::
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
std
::
move
(
name
)),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
if
(
cuda_dev_ctx
)
{
...
...
@@ -72,11 +74,11 @@ std::string Event::kind() const {
PADDLE_THROW
(
"Unknown EventKind."
);
}
double
Event
::
CpuElapsed
U
s
(
const
Event
&
e
)
const
{
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000.0
);
double
Event
::
CpuElapsed
M
s
(
const
Event
&
e
)
const
{
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000
000
.0
);
}
double
Event
::
CudaElapsed
U
s
(
const
Event
&
e
)
const
{
double
Event
::
CudaElapsed
M
s
(
const
Event
&
e
)
const
{
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE
(
e
.
has_cuda
()
&&
has_cuda
());
PADDLE_ENFORCE
(
e
.
device
()
==
device
());
...
...
@@ -84,7 +86,7 @@ double Event::CudaElapsedUs(const Event& e) const {
PADDLE_ENFORCE
(
cudaEventSynchronize
(
e
.
event
()));
float
ms
;
PADDLE_ENFORCE
(
cudaEventElapsedTime
(
&
ms
,
event_
,
e
.
event
()));
return
ms
*
1000.0
;
return
ms
;
#else
PADDLE_THROW
(
"CUDA is not enabled"
);
#endif
...
...
@@ -113,21 +115,27 @@ inline EventList& GetEventList() {
}
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kMark
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
EventKind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
dev_ctx_
=
dev_ctx
;
GetEventList
().
Record
(
EventKind
::
kPushRange
,
std
::
move
(
name
),
g_thread_id
,
dev_ctx_
);
name_
=
name
;
PushEvent
(
name_
,
dev_ctx_
);
}
RecordEvent
::~
RecordEvent
()
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
GetEventList
().
Record
(
EventKind
::
kPopRange
,
std
::
string
(),
g_thread_id
,
dev_ctx_
);
PopEvent
(
name_
,
dev_ctx_
);
}
void
EnableProfiler
(
ProfilerState
state
)
{
...
...
@@ -138,6 +146,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling "
,
"EnableProfiler."
);
g_state
=
state
;
g_profiler_place
=
(
g_state
==
ProfilerState
::
kCUDA
)
?
"CUDA"
:
"CPU"
;
#ifdef PADDLE_WITH_CUDA
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
// Generate some dummy evenets first to reduce the startup overhead.
...
...
@@ -169,5 +178,152 @@ std::vector<std::vector<Event>> DisableProfiler() {
return
result
;
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
)
{
if
(
g_profiler_place
==
""
)
return
;
std
::
string
sorted_domain
;
std
::
function
<
bool
(
EventItem
&
,
EventItem
&
)
>
sorted_func
;
switch
(
sorted_by
)
{
case
EventSortingKey
::
kCalls
:
sorted_domain
=
"number of calls"
;
sorted_func
=
[](
EventItem
&
a
,
EventItem
&
b
)
{
return
a
.
calls
>
b
.
calls
;
};
break
;
case
EventSortingKey
::
kTotal
:
sorted_domain
=
"total time"
;
sorted_func
=
[](
EventItem
&
a
,
EventItem
&
b
)
{
return
a
.
total_time
>
b
.
total_time
;
};
break
;
case
EventSortingKey
::
kMin
:
sorted_domain
=
"minimum time"
;
sorted_func
=
[](
EventItem
&
a
,
EventItem
&
b
)
{
return
a
.
min_time
>
b
.
min_time
;
};
break
;
case
EventSortingKey
::
kMax
:
sorted_domain
=
"maximum time"
;
sorted_func
=
[](
EventItem
&
a
,
EventItem
&
b
)
{
return
a
.
max_time
>
b
.
max_time
;
};
break
;
case
EventSortingKey
::
kAve
:
sorted_domain
=
"average time"
;
sorted_func
=
[](
EventItem
&
a
,
EventItem
&
b
)
{
return
a
.
ave_time
>
b
.
ave_time
;
};
break
;
default:
sorted_domain
=
"event end time"
;
}
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
size_t
max_name_width
=
0
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
i
++
)
{
std
::
list
<
Event
>
pushed_events
;
std
::
vector
<
EventItem
>
event_items
;
std
::
unordered_map
<
std
::
string
,
int
>
event_idx
;
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
j
++
)
{
if
(
events
[
i
][
j
].
kind
()
==
"push"
)
{
pushed_events
.
push_back
(
events
[
i
][
j
]);
}
else
if
(
events
[
i
][
j
].
kind
()
==
"pop"
)
{
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
()
&&
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
++
rit
;
}
if
(
rit
!=
pushed_events
.
rend
())
{
double
event_time
=
(
g_profiler_place
==
"CUDA"
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
std
::
string
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
max_name_width
=
std
::
max
(
max_name_width
,
event_name
.
size
());
if
(
event_idx
.
find
(
event_name
)
==
event_idx
.
end
())
{
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event_time
};
event_items
.
push_back
(
event_item
);
}
else
{
int
index
=
event_idx
[
event_name
];
event_items
[
index
].
calls
+=
1
;
// total time
event_items
[
index
].
total_time
+=
event_time
;
// min time
event_items
[
index
].
min_time
=
std
::
min
(
event_time
,
event_items
[
index
].
min_time
);
// max time
event_items
[
index
].
max_time
=
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
}
// remove the push marker from the list
pushed_events
.
erase
((
++
rit
).
base
());
}
else
{
LOG
(
WARNING
)
<<
"Cannot find the push marker of event
\'
"
<<
events
[
i
][
j
].
name
()
<<
"
\'
, which will be ignored in profiling report."
;
}
}
}
// average time
for
(
auto
&
item
:
event_items
)
{
item
.
ave_time
=
item
.
total_time
/
item
.
calls
;
}
// sort
if
(
sorted_by
!=
EventSortingKey
::
kDefault
)
{
std
::
sort
(
event_items
.
begin
(),
event_items
.
end
(),
sorted_func
);
}
events_table
.
push_back
(
event_items
);
// log warning if there are events with `push` but without `pop`
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
())
{
LOG
(
WARNING
)
<<
"Cannot find the pop marker of event
\'
"
<<
rit
->
name
()
<<
"
\'
, which will be ignored in profiling report."
;
++
rit
;
}
}
// Print report
PrintProfilingReport
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
}
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
cout
<<
"Place: "
<<
g_profiler_place
<<
std
::
endl
;
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
cout
<<
"Sorted by "
<<
sorted_domain
<<
" in descending order in the same thread
\n\n
"
;
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace platform
}
// namespace paddle
paddle/platform/profiler.h
浏览文件 @
5e90f5e1
...
...
@@ -33,6 +33,7 @@ class Event {
std
::
string
kind
()
const
;
std
::
string
name
()
const
{
return
name_
;
}
uint32_t
thread_id
()
const
{
return
thread_id_
;
}
bool
has_cuda
()
const
{
return
has_cuda_
;
}
#ifdef PADDLE_WITH_CUDA
...
...
@@ -40,8 +41,8 @@ class Event {
int
device
()
const
{
return
device_
;
}
#endif
double
CpuElapsed
U
s
(
const
Event
&
e
)
const
;
double
CudaElapsed
U
s
(
const
Event
&
e
)
const
;
double
CpuElapsed
M
s
(
const
Event
&
e
)
const
;
double
CudaElapsed
M
s
(
const
Event
&
e
)
const
;
private:
EventKind
kind_
;
...
...
@@ -94,6 +95,10 @@ enum ProfilerState {
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
struct
RecordEvent
{
explicit
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
...
...
@@ -101,6 +106,8 @@ struct RecordEvent {
// The device context is used by Event to get the current cuda stream.
DeviceContext
*
dev_ctx_
;
// Event name
std
::
string
name_
;
};
// Enable the profiling function.
...
...
@@ -110,5 +117,26 @@ void EnableProfiler(ProfilerState state);
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
DisableProfiler
();
// The information of each event given in the profiling report
struct
EventItem
{
std
::
string
name
;
int
calls
;
double
total_time
;
double
min_time
;
double
max_time
;
double
ave_time
;
};
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
// Parse the event list and output the profiling report
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
void
PrintProfilingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
);
}
// namespace platform
}
// namespace paddle
paddle/platform/profiler_test.cc
浏览文件 @
5e90f5e1
...
...
@@ -26,7 +26,7 @@ TEST(Event, CpuElapsedTime) {
counter
++
;
}
Event
stop_event
(
EventKind
::
kPopRange
,
"test"
,
0
,
nullptr
);
EXPECT_GT
(
start_event
.
CpuElapsed
U
s
(
stop_event
),
0
);
EXPECT_GT
(
start_event
.
CpuElapsed
M
s
(
stop_event
),
0
);
}
#ifdef PADDLE_WITH_CUDA
...
...
@@ -45,7 +45,7 @@ TEST(Event, CudaElapsedTime) {
counter
++
;
}
Event
stop_event
(
EventKind
::
kPopRange
,
"test"
,
0
,
dev_ctx
);
EXPECT_GT
(
start_event
.
CudaElapsed
U
s
(
stop_event
),
0
);
EXPECT_GT
(
start_event
.
CudaElapsed
M
s
(
stop_event
),
0
);
}
#endif
...
...
@@ -55,6 +55,7 @@ TEST(RecordEvent, RecordEvent) {
using
paddle
::
platform
::
EventKind
;
using
paddle
::
platform
::
RecordEvent
;
using
paddle
::
platform
::
ProfilerState
;
using
paddle
::
platform
::
EventSortingKey
;
ProfilerState
state
=
ProfilerState
::
kCPU
;
DeviceContext
*
dev_ctx
=
nullptr
;
...
...
@@ -67,13 +68,45 @@ TEST(RecordEvent, RecordEvent) {
#endif
EnableProfiler
(
state
);
/* Usage 1:
* PushEvent(evt_name, dev_ctx);
* ...
* code to be analyzed
* ...
* PopEvent(evt_name, dev_ctx);
*/
for
(
int
loop
=
0
;
loop
<
3
;
++
loop
)
{
for
(
int
i
=
1
;
i
<
5
;
++
i
)
{
std
::
string
name
=
"op_"
+
std
::
to_string
(
i
);
PushEvent
(
name
,
dev_ctx
);
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
PopEvent
(
name
,
dev_ctx
);
}
}
/* Usage 2:
* {
* RecordEvent record_event(name, dev_ctx);
* ...
* code to be analyzed
* ...
* }
*/
for
(
int
i
=
1
;
i
<
5
;
++
i
)
{
std
::
string
name
=
"evs_op_"
+
std
::
to_string
(
i
);
RecordEvent
record_event
(
name
,
dev_ctx
);
int
counter
=
1
;
while
(
counter
!=
i
*
1000
)
counter
++
;
}
// Bad Usage:
PushEvent
(
"event_without_pop"
,
dev_ctx
);
PopEvent
(
"event_without_push"
,
dev_ctx
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
DisableProfiler
();
// Will remove parsing-related code from test later
ParseEvents
(
events
,
EventSortingKey
::
kTotal
);
int
cuda_startup_count
=
0
;
int
start_profiler_count
=
0
;
int
stop_profiler_count
=
0
;
...
...
@@ -85,9 +118,9 @@ TEST(RecordEvent, RecordEvent) {
if
(
events
[
i
][
j
].
name
()
==
"push"
)
{
EXPECT_EQ
(
events
[
i
][
j
+
1
].
name
(),
"pop"
);
#ifdef PADDLE_WITH_CUDA
EXPECT_GT
(
events
[
i
][
j
].
CudaElapsed
U
s
(
events
[
i
][
j
+
1
]),
0
);
EXPECT_GT
(
events
[
i
][
j
].
CudaElapsed
M
s
(
events
[
i
][
j
+
1
]),
0
);
#else
EXPECT_GT
(
events
[
i
][
j
].
CpuElapsed
U
s
(
events
[
i
][
j
+
1
]),
0
);
EXPECT_GT
(
events
[
i
][
j
].
CpuElapsed
M
s
(
events
[
i
][
j
+
1
]),
0
);
#endif
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录