Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
add367c3
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
add367c3
编写于
4月 10, 2018
作者:
Q
qingqing01
提交者:
Yi Wang
4月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Code cleanup in the profiler code. (#9782)
上级
326b434a
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
133 addition
and
140 deletion
+133
-140
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+119
-75
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+5
-57
paddle/fluid/platform/profiler_test.cc
paddle/fluid/platform/profiler_test.cc
+9
-8
未找到文件。
paddle/fluid/platform/profiler.cc
浏览文件 @
add367c3
...
...
@@ -15,8 +15,11 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <iomanip>
#include <map>
#include <mutex> // NOLINT
#include <string>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
...
...
@@ -28,10 +31,10 @@ limitations under the License. */
namespace
paddle
{
namespace
platform
{
struct
EventList
;
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
// To record which timer the profiler used, CUDA or CPU.
static
std
::
string
g_profiler_place
=
""
;
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
static
thread_local
int32_t
g_thread_id
;
...
...
@@ -45,6 +48,39 @@ static std::list<std::shared_ptr<EventList>> g_all_event_lists;
// The thread local event list only can be accessed by the specific thread
static
thread_local
std
::
shared_ptr
<
EventList
>
g_event_list
;
struct
EventList
{
constexpr
static
size_t
kMB
=
1024
*
1024
;
constexpr
static
size_t
kEventBlockSize
=
16
*
kMB
;
constexpr
static
size_t
kEventSize
=
sizeof
(
Event
);
constexpr
static
size_t
kEventAlign
=
alignof
(
Event
);
constexpr
static
size_t
kNumBlock
=
kEventBlockSize
/
((
kEventSize
+
kEventAlign
-
1
)
/
kEventAlign
*
kEventAlign
);
template
<
typename
...
Args
>
void
Record
(
Args
&&
...
args
)
{
if
(
event_blocks
.
empty
()
||
event_blocks
.
front
().
size
()
==
kNumBlock
)
{
event_blocks
.
emplace_front
();
event_blocks
.
front
().
reserve
(
kNumBlock
);
}
event_blocks
.
front
().
emplace_back
(
std
::
forward
<
Args
>
(
args
)...);
}
std
::
vector
<
Event
>
Reduce
()
{
std
::
vector
<
Event
>
result
;
for
(
auto
&
block
:
event_blocks
)
{
result
.
insert
(
result
.
begin
(),
std
::
make_move_iterator
(
block
.
begin
()),
std
::
make_move_iterator
(
block
.
end
()));
}
event_blocks
.
clear
();
return
result
;
}
void
Clear
()
{
event_blocks
.
clear
();
}
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
};
inline
uint64_t
GetTimeInNsec
()
{
using
clock
=
std
::
conditional
<
std
::
chrono
::
high_resolution_clock
::
is_steady
,
std
::
chrono
::
high_resolution_clock
,
...
...
@@ -60,9 +96,9 @@ inline uint64_t PosixInNsec() {
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
Event
::
Event
(
Event
Kind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
Event
::
Event
(
Event
Type
type
,
std
::
string
name
,
uint32_t
thread_id
,
const
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
:
type_
(
type
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
if
(
has_cuda_
)
{
...
...
@@ -76,17 +112,7 @@ Event::Event(EventKind kind, std::string name, uint32_t thread_id,
cpu_ns_
=
GetTimeInNsec
();
}
std
::
string
Event
::
kind
()
const
{
switch
(
kind_
)
{
case
EventKind
::
kMark
:
return
"mark"
;
case
EventKind
::
kPushRange
:
return
"push"
;
case
EventKind
::
kPopRange
:
return
"pop"
;
}
PADDLE_THROW
(
"Unknown EventKind."
);
}
const
EventType
&
Event
::
type
()
const
{
return
type_
;
}
double
Event
::
CpuElapsedMs
(
const
Event
&
e
)
const
{
return
(
e
.
cpu_ns_
-
cpu_ns_
)
/
(
1000000.0
);
...
...
@@ -129,15 +155,15 @@ inline EventList& GetEventList() {
}
void
Mark
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PushEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
Event
Kind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
GetEventList
().
Record
(
Event
Type
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
...
...
@@ -197,12 +223,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling "
,
"EnableProfiler."
);
g_state
=
state
;
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
g_profiler_place
=
"CUDA"
;
}
else
if
(
g_state
==
ProfilerState
::
kCPU
)
{
g_profiler_place
=
"CPU"
;
}
else
{
g_profiler_place
=
"All"
;
if
(
g_state
==
ProfilerState
::
kAll
)
{
GetDeviceTracer
()
->
Enable
();
}
#ifdef PADDLE_WITH_CUDA
...
...
@@ -240,27 +261,63 @@ std::vector<std::vector<Event>> GetAllEvents() {
return
result
;
}
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
)
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
"Can't disable profiling, since it's not starting."
);
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
g_state
=
ProfilerState
::
kDisabled
;
// The information of each event given in the profiling report
struct
EventItem
{
std
::
string
name
;
int
calls
;
double
total_time
;
double
min_time
;
double
max_time
;
double
ave_time
;
};
// Print results
void
PrintProfiler
(
const
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
const
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
string
place
;
if
(
g_state
==
ProfilerState
::
kCPU
)
{
place
=
"CPU"
;
}
else
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
place
=
"CUDA"
;
}
else
if
(
g_state
==
ProfilerState
::
kAll
)
{
place
=
"All"
;
}
else
{
PADDLE_THROW
(
"Invalid profiler state"
);
}
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
ParseEvents
(
all_events
,
sorted_key
);
ResetProfiler
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
g_profiler_place
==
"All"
&&
tracer
&&
tracer
->
IsEnabled
())
{
tracer
->
Disable
();
tracer
->
GenProfile
(
profile_path
);
std
::
cout
<<
"Place: "
<<
place
<<
std
::
endl
;
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
cout
<<
"Sorted by "
<<
sorted_domain
<<
" in descending order in the same thread
\n\n
"
;
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
)
{
if
(
g_profiler_place
==
""
)
return
;
// Parse the event list and output the profiling report
void
ParseEvents
(
const
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
std
::
string
sorted_domain
;
std
::
function
<
bool
(
const
EventItem
&
,
const
EventItem
&
)
>
sorted_func
;
...
...
@@ -307,9 +364,9 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
std
::
unordered_map
<
std
::
string
,
int
>
event_idx
;
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
j
++
)
{
if
(
events
[
i
][
j
].
kind
()
==
"push"
)
{
if
(
events
[
i
][
j
].
type
()
==
EventType
::
kPushRange
)
{
pushed_events
.
push_back
(
events
[
i
][
j
]);
}
else
if
(
events
[
i
][
j
].
kind
()
==
"pop"
)
{
}
else
if
(
events
[
i
][
j
].
type
()
==
EventType
::
kPopRange
)
{
std
::
list
<
Event
>::
reverse_iterator
rit
=
pushed_events
.
rbegin
();
while
(
rit
!=
pushed_events
.
rend
()
&&
rit
->
name
()
!=
events
[
i
][
j
].
name
())
{
...
...
@@ -317,8 +374,8 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
}
if
(
rit
!=
pushed_events
.
rend
())
{
double
event_time
=
(
g_profiler_place
==
"CUDA"
||
g_profiler_place
==
"All"
)
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
||
g_state
==
ProfilerState
::
kAll
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
...
...
@@ -376,35 +433,22 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
PrintProfiler
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
}
void
PrintProfiler
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
<<
"<-------------------------
\n\n
"
;
std
::
cout
<<
"Place: "
<<
g_profiler_place
<<
std
::
endl
;
std
::
cout
<<
"Time unit: ms"
<<
std
::
endl
;
std
::
cout
<<
"Sorted by "
<<
sorted_domain
<<
" in descending order in the same thread
\n\n
"
;
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
}
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
)
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
"Can't disable profiling, since it's not starting."
);
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
ParseEvents
(
all_events
,
sorted_key
);
ResetProfiler
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
g_state
==
ProfilerState
::
kAll
&&
tracer
&&
tracer
->
IsEnabled
())
{
tracer
->
Disable
();
tracer
->
GenProfile
(
profile_path
);
}
std
::
cout
<<
std
::
endl
;
g_state
=
ProfilerState
::
kDisabled
;
}
}
// namespace platform
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
add367c3
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include <forward_list>
#include <list>
#include <
mutex
>
#include <
string
>
#include <vector>
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.pb.h"
...
...
@@ -23,16 +23,16 @@ limitations under the License. */
namespace
paddle
{
namespace
platform
{
enum
Event
Kind
{
kMark
,
kPushRange
,
kPopRange
};
enum
Event
Type
{
kMark
,
kPushRange
,
kPopRange
};
class
Event
{
public:
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
Event
(
Event
Kind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
Event
(
Event
Type
type
,
std
::
string
name
,
uint32_t
thread_id
,
const
DeviceContext
*
dev_ctx
);
std
::
string
kind
()
const
;
const
EventType
&
type
()
const
;
std
::
string
name
()
const
{
return
name_
;
}
uint32_t
thread_id
()
const
{
return
thread_id_
;
}
bool
has_cuda
()
const
{
return
has_cuda_
;
}
...
...
@@ -46,7 +46,7 @@ class Event {
double
CudaElapsedMs
(
const
Event
&
e
)
const
;
private:
Event
Kind
kind
_
;
Event
Type
type
_
;
std
::
string
name_
;
uint32_t
thread_id_
;
int64_t
cpu_ns_
;
...
...
@@ -57,39 +57,6 @@ class Event {
#endif
};
struct
EventList
{
constexpr
static
size_t
kMB
=
1024
*
1024
;
constexpr
static
size_t
kEventBlockSize
=
16
*
kMB
;
constexpr
static
size_t
kEventSize
=
sizeof
(
Event
);
constexpr
static
size_t
kEventAlign
=
alignof
(
Event
);
constexpr
static
size_t
kNumBlock
=
kEventBlockSize
/
((
kEventSize
+
kEventAlign
-
1
)
/
kEventAlign
*
kEventAlign
);
template
<
typename
...
Args
>
void
Record
(
Args
&&
...
args
)
{
if
(
event_blocks
.
empty
()
||
event_blocks
.
front
().
size
()
==
kNumBlock
)
{
event_blocks
.
emplace_front
();
event_blocks
.
front
().
reserve
(
kNumBlock
);
}
event_blocks
.
front
().
emplace_back
(
std
::
forward
<
Args
>
(
args
)...);
}
std
::
vector
<
Event
>
Reduce
()
{
std
::
vector
<
Event
>
result
;
for
(
auto
&
block
:
event_blocks
)
{
result
.
insert
(
result
.
begin
(),
std
::
make_move_iterator
(
block
.
begin
()),
std
::
make_move_iterator
(
block
.
end
()));
}
event_blocks
.
clear
();
return
result
;
}
void
Clear
()
{
event_blocks
.
clear
();
}
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
};
enum
ProfilerState
{
kDisabled
,
// disabled state
kCPU
,
// CPU profiling state
...
...
@@ -136,16 +103,6 @@ struct RecordThread {
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
// The information of each event given in the profiling report
struct
EventItem
{
std
::
string
name
;
int
calls
;
double
total_time
;
double
min_time
;
double
max_time
;
double
ave_time
;
};
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
...
...
@@ -158,14 +115,5 @@ void ResetProfiler();
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
);
// Parse the event list and output the profiling report
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
void
PrintProfiler
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
);
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler_test.cc
浏览文件 @
add367c3
...
...
@@ -13,22 +13,23 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#include
"cuda_runtime.h"
#include
<cuda_runtime.h>
#endif
#include "gtest/gtest.h"
TEST
(
Event
,
CpuElapsedTime
)
{
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
Event
start_event
(
Event
Kind
::
kPushRange
,
"test"
,
0
,
nullptr
);
Event
start_event
(
Event
Type
::
kPushRange
,
"test"
,
0
,
nullptr
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
false
);
int
counter
=
0
;
while
(
counter
!=
1000
)
{
counter
++
;
}
Event
stop_event
(
Event
Kind
::
kPopRange
,
"test"
,
0
,
nullptr
);
Event
stop_event
(
Event
Type
::
kPopRange
,
"test"
,
0
,
nullptr
);
EXPECT_GT
(
start_event
.
CpuElapsedMs
(
stop_event
),
0
);
}
...
...
@@ -38,16 +39,16 @@ TEST(Event, CudaElapsedTime) {
using
paddle
::
platform
::
CUDADeviceContext
;
using
paddle
::
platform
::
CUDAPlace
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
DeviceContext
*
dev_ctx
=
new
CUDADeviceContext
(
CUDAPlace
(
0
));
Event
start_event
(
Event
Kind
::
kPushRange
,
"test"
,
0
,
dev_ctx
);
Event
start_event
(
Event
Type
::
kPushRange
,
"test"
,
0
,
dev_ctx
);
EXPECT_TRUE
(
start_event
.
has_cuda
()
==
true
);
int
counter
=
0
;
while
(
counter
!=
1000
)
{
counter
++
;
}
Event
stop_event
(
Event
Kind
::
kPopRange
,
"test"
,
0
,
dev_ctx
);
Event
stop_event
(
Event
Type
::
kPopRange
,
"test"
,
0
,
dev_ctx
);
EXPECT_GT
(
start_event
.
CudaElapsedMs
(
stop_event
),
0
);
}
#endif
...
...
@@ -55,7 +56,7 @@ TEST(Event, CudaElapsedTime) {
TEST
(
RecordEvent
,
RecordEvent
)
{
using
paddle
::
platform
::
DeviceContext
;
using
paddle
::
platform
::
Event
;
using
paddle
::
platform
::
Event
Kind
;
using
paddle
::
platform
::
Event
Type
;
using
paddle
::
platform
::
RecordEvent
;
using
paddle
::
platform
::
ProfilerState
;
using
paddle
::
platform
::
EventSortingKey
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录