Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
add367c3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
add367c3
编写于
4月 10, 2018
作者:
Q
qingqing01
提交者:
Yi Wang
4月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Code cleanup in the profiler code. (#9782)
上级
326b434a
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
133 addition
and
140 deletion
+133
-140
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+119
-75
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+5
-57
paddle/fluid/platform/profiler_test.cc
paddle/fluid/platform/profiler_test.cc
+9
-8
未找到文件。
paddle/fluid/platform/profiler.cc
浏览文件 @
add367c3
...
@@ -15,8 +15,11 @@ limitations under the License. */
...
@@ -15,8 +15,11 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include <sys/time.h>
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <algorithm>
#include <iomanip>
#include <iomanip>
#include <map>
#include <map>
#include <mutex> // NOLINT
#include <string>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
#endif // PADDLE_WITH_CUDA
...
@@ -28,10 +31,10 @@ limitations under the License. */
...
@@ -28,10 +31,10 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
struct
EventList
;
// The profiler state, the initial value is ProfilerState::kDisabled
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// To record which timer the profiler used, CUDA or CPU.
static
std
::
string
g_profiler_place
=
""
;
// The thread local event list only can be accessed by the specific thread
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
// The thread index of each thread
static
thread_local
int32_t
g_thread_id
;
static
thread_local
int32_t
g_thread_id
;
...
@@ -45,6 +48,39 @@ static std::list<std::shared_ptr<EventList>> g_all_event_lists;
...
@@ -45,6 +48,39 @@ static std::list<std::shared_ptr<EventList>> g_all_event_lists;
// The thread local event list only can be accessed by the specific thread
// The thread local event list only can be accessed by the specific thread
static
thread_local
std
::
shared_ptr
<
EventList
>
g_event_list
;
static
thread_local
std
::
shared_ptr
<
EventList
>
g_event_list
;
struct
EventList
{
constexpr
static
size_t
kMB
=
1024
*
1024
;
constexpr
static
size_t
kEventBlockSize
=
16
*
kMB
;
constexpr
static
size_t
kEventSize
=
sizeof
(
Event
);
constexpr
static
size_t
kEventAlign
=
alignof
(
Event
);
constexpr
static
size_t
kNumBlock
=
kEventBlockSize
/
((
kEventSize
+
kEventAlign
-
1
)
/
kEventAlign
*
kEventAlign
);
template
<
typename
...
Args
>
void
Record
(
Args
&&
...
args
)
{
if
(
event_blocks
.
empty
()
||
event_blocks
.
front
().
size
()
==
kNumBlock
)
{
event_blocks
.
emplace_front
();
event_blocks
.
front
().
reserve
(
kNumBlock
);
}
event_blocks
.
front
().
emplace_back
(
std
::
forward
<
Args
>
(
args
)...);
}
std
::
vector
<
Event
>
Reduce
()
{
std
::
vector
<
Event
>
result
;
for
(
auto
&
block
:
event_blocks
)
{
result
.
insert
(
result
.
begin
(),
std
::
make_move_iterator
(
block
.
begin
()),
std
::
make_move_iterator
(
block
.
end
()));
}
event_blocks
.
clear
();
return
result
;
}
void
Clear
()
{
event_blocks
.
clear
();
}
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
};
inline
uint64_t
GetTimeInNsec
()
{
inline
uint64_t
GetTimeInNsec
()
{
using
clock
=
std
::
conditional
<
std
::
chrono
::
high_resolution_clock
::
is_steady
,
using
clock
=
std
::
conditional
<
std
::
chrono
::
high_resolution_clock
::
is_steady
,
std
::
chrono
::
high_resolution_clock
,
std
::
chrono
::
high_resolution_clock
,
...
@@ -60,9 +96,9 @@ inline uint64_t PosixInNsec() {
...
@@ -60,9 +96,9 @@ inline uint64_t PosixInNsec() {
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
}
Event
::
Event
(
Event
Kind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
Event
::
Event
(
Event
Type
type
,
std
::
string
name
,
uint32_t
thread_id
,
const
DeviceContext
*
dev_ctx
)
const
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
:
type_
(
type
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
if
(
has_cuda_
)
{
if
(
has_cuda_
)
{
...
@@ -76,17 +112,7 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id,
...
@@ -76,17 +112,7 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id,
cpu_ns_
=
GetTimeInNsec
();
cpu_ns_
=
GetTimeInNsec
();
}
}
std
::
string
Event
::
kind
()
const
{
const
EventType
&
Event
::
type
()
const
{
return
type_
;
}
switch
(
kind_
)
{
case
EventKind
::
kMark
:
return
"mark"
;
case
EventKind
::
kPushRange
:
return
"push"
;
case
EventKind
::
kPopRange
:
return
"pop"
;
}
PADDLE_THROW
(
"Unknown EventKind."
);
}
double
Event
::
CpuElapsedMs
(
const
Event
&
e
)
const
{
double
Event
::
CpuElapsedMs
(
const
Event
&
e
)
const
{
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000000.0
);
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000000.0
);
...
@@ -129,15 +155,15 @@ inline EventList& GetEventList() {
...
@@ -129,15 +155,15 @@ inline EventList& GetEventList() {
}
}
void
Mark
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
void
Mark
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
}
void
PushEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
void
PushEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
}
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
...
@@ -197,12 +223,7 @@ void EnableProfiler(ProfilerState state) {
...
@@ -197,12 +223,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling "
,
"The profiling state should be disabled when calling "
,
"EnableProfiler."
);
"EnableProfiler."
);
g_state
=
state
;
g_state
=
state
;
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
if
(
g_state
==
ProfilerState
::
kAll
)
{
g_profiler_place
=
"CUDA"
;
}
else
if
(
g_state
==
ProfilerState
::
kCPU
)
{
g_profiler_place
=
"CPU"
;
}
else
{
g_profiler_place
=
"All"
;
GetDeviceTracer
()
->
Enable
();
GetDeviceTracer
()
->
Enable
();
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -240,27 +261,63 @@ std::vector<std::vector<Event>> GetAllEvents() {
...
@@ -240,27 +261,63 @@ std::vector<std::vector<Event>> GetAllEvents() {
return
result
;
return
result
;
}
}
void
DisableProfiler
(
EventSortingKey
sorted_key
,
// The information of each event given in the profiling report
const
std
::
string
&
profile_path
)
{
struct
EventItem
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
std
::
string
name
;
"Can't disable profiling, since it's not starting."
);
int
calls
;
// Mark the profiling stop.
double
total_time
;
Mark
(
"_stop_profiler_"
,
nullptr
);
double
min_time
;
g_state
=
ProfilerState
::
kDisabled
;
double
max_time
;
double
ave_time
;
};
// Print results
void
PrintProfiler
(
const
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
const
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
string
place
;
if
(
g_state
==
ProfilerState
::
kCPU
)
{
place
=
"CPU"
;
}
else
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
place
=
"CUDA"
;
}
else
if
(
g_state
==
ProfilerState
::
kAll
)
{
place
=
"All"
;
}
else
{
PADDLE_THROW
(
"Invalid profiler state"
);
}
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
std
::
cout
<<
"Place: "
<<
place
<<
std
::
endl
;
ParseEvents
(
all_events
,
sorted_key
);
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
ResetProfiler
();
std
::
cout
<<
"Sorted by "
<<
sorted_domain
DeviceTracer
*
tracer
=
GetDeviceTracer
();
<<
" in descending order in the same thread
\n\n
"
;
if
(
g_profiler_place
==
"All"
&&
tracer
&&
tracer
->
IsEnabled
())
{
// Output events table
tracer
->
Disable
();
std
::
cout
.
setf
(
std
::
ios
::
left
);
tracer
->
GenProfile
(
profile_path
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
}
std
::
cout
<<
std
::
endl
;
}
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
// Parse the event list and output the profiling report
EventSortingKey
sorted_by
)
{
void
ParseEvents
(
const
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
if
(
g_profiler_place
==
""
)
return
;
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
std
::
string
sorted_domain
;
std
::
string
sorted_domain
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
...
@@ -307,9 +364,9 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
...
@@ -307,9 +364,9 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
std
::
unordered_map
<
std
::
string
,
int
>
event_idx
;
std
::
unordered_map
<
std
::
string
,
int
>
event_idx
;
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
j
++
)
{
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
j
++
)
{
if
(
events
[
i
][
j
].
kind
()
==
"push"
)
{
if
(
events
[
i
][
j
].
type
()
==
EventType
::
kPushRange
)
{
pushed_events
.
push_back
(
events
[
i
][
j
]);
pushed_events
.
push_back
(
events
[
i
][
j
]);
}
else
if
(
events
[
i
][
j
].
kind
()
==
"pop"
)
{
}
else
if
(
events
[
i
][
j
].
type
()
==
EventType
::
kPopRange
)
{
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
()
&&
while
(
rit
!=
pushed_events
.
rend
()
&&
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
...
@@ -317,8 +374,8 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
...
@@ -317,8 +374,8 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
}
}
if
(
rit
!=
pushed_events
.
rend
())
{
if
(
rit
!=
pushed_events
.
rend
())
{
double
event_time
=
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
||
(
g_profiler_place
==
"CUDA"
||
g_profiler_place
==
"All"
)
g_state
==
ProfilerState
::
kAll
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
...
@@ -376,35 +433,22 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
...
@@ -376,35 +433,22 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
PrintProfiler
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
PrintProfiler
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
}
}
void
PrintProfiler
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
void
DisableProfiler
(
EventSortingKey
sorted_key
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
std
::
string
&
profile_path
)
{
const
size_t
data_width
)
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
// Output header information
"Can't disable profiling, since it's not starting."
);
std
::
cout
<<
"
\n
------------------------->"
// Mark the profiling stop.
<<
" Profiling Report "
Mark
(
"_stop_profiler_"
,
nullptr
);
<<
"<-------------------------
\n\n
"
;
std
::
cout
<<
"Place: "
<<
g_profiler_place
<<
std
::
endl
;
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
ParseEvents
(
all_events
,
sorted_key
);
std
::
cout
<<
"Sorted by "
<<
sorted_domain
ResetProfiler
();
<<
" in descending order in the same thread
\n\n
"
;
DeviceTracer
*
tracer
=
GetDeviceTracer
();
// Output events table
if
(
g_state
==
ProfilerState
::
kAll
&&
tracer
&&
tracer
->
IsEnabled
())
{
std
::
cout
.
setf
(
std
::
ios
::
left
);
tracer
->
Disable
();
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
tracer
->
GenProfile
(
profile_path
);
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
}
std
::
cout
<<
std
::
endl
;
g_state
=
ProfilerState
::
kDisabled
;
}
}
}
// namespace platform
}
// namespace platform
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
add367c3
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <forward_list>
#include <forward_list>
#include <list>
#include <list>
#include <
mutex
>
#include <
string
>
#include <vector>
#include <vector>
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.pb.h"
#include "paddle/fluid/platform/profiler.pb.h"
...
@@ -23,16 +23,16 @@ limitations under the License. */
...
@@ -23,16 +23,16 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
enum
Event
Kind
{
kMark
,
kPushRange
,
kPopRange
};
enum
Event
Type
{
kMark
,
kPushRange
,
kPopRange
};
class
Event
{
class
Event
{
public:
public:
// The DeviceContext is used to get the cuda stream.
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
// If CPU profiling mode, can pass nullptr.
Event
(
Event
Kind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
Event
(
Event
Type
type
,
std
::
string
name
,
uint32_t
thread_id
,
const
DeviceContext
*
dev_ctx
);
const
DeviceContext
*
dev_ctx
);
std
::
string
kind
()
const
;
const
EventType
&
type
()
const
;
std
::
string
name
()
const
{
return
name_
;
}
std
::
string
name
()
const
{
return
name_
;
}
uint32_t
thread_id
()
const
{
return
thread_id_
;
}
uint32_t
thread_id
()
const
{
return
thread_id_
;
}
bool
has_cuda
()
const
{
return
has_cuda_
;
}
bool
has_cuda
()
const
{
return
has_cuda_
;
}
...
@@ -46,7 +46,7 @@ class Event {
...
@@ -46,7 +46,7 @@ class Event {
double
CudaElapsedMs
(
const
Event
&
e
)
const
;
double
CudaElapsedMs
(
const
Event
&
e
)
const
;
private:
private:
Event
Kind
kind
_
;
Event
Type
type
_
;
std
::
string
name_
;
std
::
string
name_
;
uint32_t
thread_id_
;
uint32_t
thread_id_
;
int64_t
cpu_ns_
;
int64_t
cpu_ns_
;
...
@@ -57,39 +57,6 @@ class Event {
...
@@ -57,39 +57,6 @@ class Event {
#endif
#endif
};
};
struct
EventList
{
constexpr
static
size_t
kMB
=
1024
*
1024
;
constexpr
static
size_t
kEventBlockSize
=
16
*
kMB
;
constexpr
static
size_t
kEventSize
=
sizeof
(
Event
);
constexpr
static
size_t
kEventAlign
=
alignof
(
Event
);
constexpr
static
size_t
kNumBlock
=
kEventBlockSize
/
((
kEventSize
+
kEventAlign
-
1
)
/
kEventAlign
*
kEventAlign
);
template
<
typename
...
Args
>
void
Record
(
Args
&&
...
args
)
{
if
(
event_blocks
.
empty
()
||
event_blocks
.
front
().
size
()
==
kNumBlock
)
{
event_blocks
.
emplace_front
();
event_blocks
.
front
().
reserve
(
kNumBlock
);
}
event_blocks
.
front
().
emplace_back
(
std
::
forward
<
Args
>
(
args
)...);
}
std
::
vector
<
Event
>
Reduce
()
{
std
::
vector
<
Event
>
result
;
for
(
auto
&
block
:
event_blocks
)
{
result
.
insert
(
result
.
begin
(),
std
::
make_move_iterator
(
block
.
begin
()),
std
::
make_move_iterator
(
block
.
end
()));
}
event_blocks
.
clear
();
return
result
;
}
void
Clear
()
{
event_blocks
.
clear
();
}
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
};
enum
ProfilerState
{
enum
ProfilerState
{
kDisabled
,
// disabled state
kDisabled
,
// disabled state
kCPU
,
// CPU profiling state
kCPU
,
// CPU profiling state
...
@@ -136,16 +103,6 @@ struct RecordThread {
...
@@ -136,16 +103,6 @@ struct RecordThread {
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
// The information of each event given in the profiling report
struct
EventItem
{
std
::
string
name
;
int
calls
;
double
total_time
;
double
min_time
;
double
max_time
;
double
ave_time
;
};
// Candidate keys to sort the profiling report
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
...
@@ -158,14 +115,5 @@ void ResetProfiler();
...
@@ -158,14 +115,5 @@ void ResetProfiler();
void
DisableProfiler
(
EventSortingKey
sorted_key
,
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
);
const
std
::
string
&
profile_path
);
// Parse the event list and output the profiling report
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
void
PrintProfiler
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
);
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/profiler_test.cc
浏览文件 @
add367c3
...
@@ -13,22 +13,23 @@ See the License for the specific language governing permissions and
...
@@ -13,22 +13,23 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include
"cuda_runtime.h"
#include
<cuda_runtime.h>
#endif
#endif
#include "gtest/gtest.h"
#include "gtest/gtest.h"
TEST
(
Event
,
CpuElapsedTime
)
{
TEST
(
Event
,
CpuElapsedTime
)
{
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
Event
start_event
(
Event
Kind
::
kPushRange
,
"test"
,
0
,
nullptr
);
Event
start_event
(
Event
Type
::
kPushRange
,
"test"
,
0
,
nullptr
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
false
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
false
);
int
counter
=
0
;
int
counter
=
0
;
while
(
counter
!=
1000
)
{
while
(
counter
!=
1000
)
{
counter
++
;
counter
++
;
}
}
Event
stop_event
(
Event
Kind
::
kPopRange
,
"test"
,
0
,
nullptr
);
Event
stop_event
(
Event
Type
::
kPopRange
,
"test"
,
0
,
nullptr
);
EXPECT_GT
(
start_event
.
CpuElapsedMs
(
stop_event
),
0
);
EXPECT_GT
(
start_event
.
CpuElapsedMs
(
stop_event
),
0
);
}
}
...
@@ -38,16 +39,16 @@ TEST(Event, CudaElapsedTime) {
...
@@ -38,16 +39,16 @@ TEST(Event, CudaElapsedTime) {
using
paddle
::
platform
::
CUDADeviceContext
;
using
paddle
::
platform
::
CUDADeviceContext
;
using
paddle
::
platform
::
CUDAPlace
;
using
paddle
::
platform
::
CUDAPlace
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
DeviceContext
*
dev_ctx
=
new
CUDADeviceContext
(
CUDAPlace
(
0
));
DeviceContext
*
dev_ctx
=
new
CUDADeviceContext
(
CUDAPlace
(
0
));
Event
start_event
(
Event
Kind
::
kPushRange
,
"test"
,
0
,
dev_ctx
);
Event
start_event
(
Event
Type
::
kPushRange
,
"test"
,
0
,
dev_ctx
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
true
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
true
);
int
counter
=
0
;
int
counter
=
0
;
while
(
counter
!=
1000
)
{
while
(
counter
!=
1000
)
{
counter
++
;
counter
++
;
}
}
Event
stop_event
(
Event
Kind
::
kPopRange
,
"test"
,
0
,
dev_ctx
);
Event
stop_event
(
Event
Type
::
kPopRange
,
"test"
,
0
,
dev_ctx
);
EXPECT_GT
(
start_event
.
CudaElapsedMs
(
stop_event
),
0
);
EXPECT_GT
(
start_event
.
CudaElapsedMs
(
stop_event
),
0
);
}
}
#endif
#endif
...
@@ -55,7 +56,7 @@ TEST(Event, CudaElapsedTime) {
...
@@ -55,7 +56,7 @@ TEST(Event, CudaElapsedTime) {
TEST
(
RecordEvent
,
RecordEvent
)
{
TEST
(
RecordEvent
,
RecordEvent
)
{
using
paddle
::
platform
::
DeviceContext
;
using
paddle
::
platform
::
DeviceContext
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
using
paddle
::
platform
::
RecordEvent
;
using
paddle
::
platform
::
RecordEvent
;
using
paddle
::
platform
::
ProfilerState
;
using
paddle
::
platform
::
ProfilerState
;
using
paddle
::
platform
::
EventSortingKey
;
using
paddle
::
platform
::
EventSortingKey
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录