Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3b08c9ab
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3b08c9ab
编写于
2月 22, 2019
作者:
C
chengduo
提交者:
GitHub
2月 22, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enhance profiler (#15842)
test=develop
上级
7d96c74a
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
59 addition
and
11 deletion
+59
-11
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+2
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+47
-10
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+10
-1
未找到文件。
paddle/fluid/platform/device_tracer.cc
浏览文件 @
3b08c9ab
...
@@ -601,6 +601,8 @@ void initCuptiCbidStr() {
...
@@ -601,6 +601,8 @@ void initCuptiCbidStr() {
REGISTER_RUNTIME_CBID_STR
(
cudaStreamSynchronize_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamSynchronize_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamWaitEvent_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamWaitEvent_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaUnbindTexture_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaUnbindTexture_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaSetupArgument_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunch_v3020
);
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernel_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernel_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernelMultiDevice_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernelMultiDevice_v9000
);
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
3b08c9ab
...
@@ -254,9 +254,11 @@ struct EventItem {
...
@@ -254,9 +254,11 @@ struct EventItem {
std
::
string
name
;
std
::
string
name
;
int
calls
;
int
calls
;
double
total_time
;
double
total_time
;
double
min_time
;
double
max_time
;
double
max_time
;
double
ave_time
;
double
ave_time
;
double
min_time
;
double
cpu_time
;
double
gpu_time
;
float
ratio
;
float
ratio
;
};
};
...
@@ -290,8 +292,12 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
...
@@ -290,8 +292,12 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
// Output events table
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
;
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
if
(
g_state
==
ProfilerState
::
kAll
)
{
std
::
cout
<<
std
::
setw
(
data_width
*
2
)
<<
"CPU Time (Ratio)"
<<
std
::
setw
(
data_width
*
2
)
<<
"GPU Time (Ratio)"
;
}
std
::
cout
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
...
@@ -299,8 +305,18 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
...
@@ -299,8 +305,18 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
;
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
if
(
g_state
==
ProfilerState
::
kAll
)
{
std
::
cout
<<
std
::
setw
(
data_width
*
2
)
<<
string
::
Sprintf
(
"%f (%f)"
,
event_item
.
cpu_time
,
(
event_item
.
cpu_time
/
event_item
.
total_time
))
<<
std
::
setw
(
data_width
*
2
)
<<
string
::
Sprintf
(
"%f (%f)"
,
event_item
.
gpu_time
,
(
event_item
.
gpu_time
/
event_item
.
total_time
));
}
std
::
cout
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ratio
<<
std
::
endl
;
<<
std
::
setw
(
data_width
)
<<
event_item
.
ratio
<<
std
::
endl
;
...
@@ -349,6 +365,18 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -349,6 +365,18 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
return
a
.
ave_time
>
b
.
ave_time
;
return
a
.
ave_time
>
b
.
ave_time
;
};
};
break
;
break
;
case
EventSortingKey
::
kGPUTime
:
sorted_domain
=
"average time"
;
sorted_func
=
[](
const
EventItem
&
a
,
const
EventItem
&
b
)
{
return
a
.
gpu_time
>
b
.
gpu_time
;
};
break
;
case
EventSortingKey
::
kCPUTime
:
sorted_domain
=
"average time"
;
sorted_func
=
[](
const
EventItem
&
a
,
const
EventItem
&
b
)
{
return
a
.
cpu_time
>
b
.
cpu_time
;
};
break
;
default:
default:
sorted_domain
=
"event first end time"
;
sorted_domain
=
"event first end time"
;
}
}
...
@@ -387,10 +415,17 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -387,10 +415,17 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
}
if
(
rit
!=
pushed_events
.
rend
())
{
if
(
rit
!=
pushed_events
.
rend
())
{
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
||
double
event_time
=
0
;
g_state
==
ProfilerState
::
kAll
)
double
gpu_time
=
rit
->
CudaElapsedMs
((
*
analyze_events
)[
i
][
j
]);
?
rit
->
CudaElapsedMs
((
*
analyze_events
)[
i
][
j
])
double
cpu_time
=
rit
->
CpuElapsedMs
((
*
analyze_events
)[
i
][
j
]);
:
rit
->
CpuElapsedMs
((
*
analyze_events
)[
i
][
j
]);
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
event_time
=
gpu_time
;
}
else
if
(
g_state
==
ProfilerState
::
kCPU
)
{
event_time
=
cpu_time
;
}
else
{
event_time
=
gpu_time
+
cpu_time
;
}
total
+=
event_time
;
total
+=
event_time
;
std
::
string
event_name
;
std
::
string
event_name
;
...
@@ -407,7 +442,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -407,7 +442,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
event_idx
[
event_name
]
=
event_items
.
size
();
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
0.
};
gpu_time
,
cpu_time
,
0.
};
event_items
.
push_back
(
event_item
);
event_items
.
push_back
(
event_item
);
}
else
{
}
else
{
int
index
=
event_idx
[
event_name
];
int
index
=
event_idx
[
event_name
];
...
@@ -420,6 +455,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -420,6 +455,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
// max time
// max time
event_items
[
index
].
max_time
=
event_items
[
index
].
max_time
=
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
event_items
[
index
].
gpu_time
+=
gpu_time
;
event_items
[
index
].
cpu_time
+=
cpu_time
;
}
}
// remove the push marker from the list
// remove the push marker from the list
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
3b08c9ab
...
@@ -117,7 +117,16 @@ struct RecordBlock {
...
@@ -117,7 +117,16 @@ struct RecordBlock {
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
// Candidate keys to sort the profiling report
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
,
kCPUTime
,
kGPUTime
};
// Enable the profiling function.
// Enable the profiling function.
void
EnableProfiler
(
ProfilerState
state
);
void
EnableProfiler
(
ProfilerState
state
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录