Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3b08c9ab
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3b08c9ab
编写于
2月 22, 2019
作者:
C
chengduo
提交者:
GitHub
2月 22, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enhance profiler (#15842)
test=develop
上级
7d96c74a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
59 addition
and
11 deletion
+59
-11
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+2
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+47
-10
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+10
-1
未找到文件。
paddle/fluid/platform/device_tracer.cc
浏览文件 @
3b08c9ab
...
@@ -601,6 +601,8 @@ void initCuptiCbidStr() {
...
@@ -601,6 +601,8 @@ void initCuptiCbidStr() {
REGISTER_RUNTIME_CBID_STR
(
cudaStreamSynchronize_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamSynchronize_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamWaitEvent_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaStreamWaitEvent_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaUnbindTexture_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaUnbindTexture_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaSetupArgument_v3020
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunch_v3020
);
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernel_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernel_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernelMultiDevice_v9000
);
REGISTER_RUNTIME_CBID_STR
(
cudaLaunchCooperativeKernelMultiDevice_v9000
);
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
3b08c9ab
...
@@ -254,9 +254,11 @@ struct EventItem {
...
@@ -254,9 +254,11 @@ struct EventItem {
std
::
string
name
;
std
::
string
name
;
int
calls
;
int
calls
;
double
total_time
;
double
total_time
;
double
min_time
;
double
max_time
;
double
max_time
;
double
ave_time
;
double
ave_time
;
double
min_time
;
double
cpu_time
;
double
gpu_time
;
float
ratio
;
float
ratio
;
};
};
...
@@ -290,8 +292,12 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
...
@@ -290,8 +292,12 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
// Output events table
// Output events table
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
.
setf
(
std
::
ios
::
left
);
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
;
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
if
(
g_state
==
ProfilerState
::
kAll
)
{
std
::
cout
<<
std
::
setw
(
data_width
*
2
)
<<
"CPU Time (Ratio)"
<<
std
::
setw
(
data_width
*
2
)
<<
"GPU Time (Ratio)"
;
}
std
::
cout
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
...
@@ -299,8 +305,18 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
...
@@ -299,8 +305,18 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
std
::
cout
<<
std
::
setw
(
name_width
)
<<
event_item
.
name
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
calls
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
;
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
if
(
g_state
==
ProfilerState
::
kAll
)
{
std
::
cout
<<
std
::
setw
(
data_width
*
2
)
<<
string
::
Sprintf
(
"%f (%f)"
,
event_item
.
cpu_time
,
(
event_item
.
cpu_time
/
event_item
.
total_time
))
<<
std
::
setw
(
data_width
*
2
)
<<
string
::
Sprintf
(
"%f (%f)"
,
event_item
.
gpu_time
,
(
event_item
.
gpu_time
/
event_item
.
total_time
));
}
std
::
cout
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ratio
<<
std
::
endl
;
<<
std
::
setw
(
data_width
)
<<
event_item
.
ratio
<<
std
::
endl
;
...
@@ -349,6 +365,18 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -349,6 +365,18 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
return
a
.
ave_time
>
b
.
ave_time
;
return
a
.
ave_time
>
b
.
ave_time
;
};
};
break
;
break
;
case
EventSortingKey
::
kGPUTime
:
sorted_domain
=
"average time"
;
sorted_func
=
[](
const
EventItem
&
a
,
const
EventItem
&
b
)
{
return
a
.
gpu_time
>
b
.
gpu_time
;
};
break
;
case
EventSortingKey
::
kCPUTime
:
sorted_domain
=
"average time"
;
sorted_func
=
[](
const
EventItem
&
a
,
const
EventItem
&
b
)
{
return
a
.
cpu_time
>
b
.
cpu_time
;
};
break
;
default:
default:
sorted_domain
=
"event first end time"
;
sorted_domain
=
"event first end time"
;
}
}
...
@@ -387,10 +415,17 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -387,10 +415,17 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
}
if
(
rit
!=
pushed_events
.
rend
())
{
if
(
rit
!=
pushed_events
.
rend
())
{
double
event_time
=
(
g_state
==
ProfilerState
::
kCUDA
||
double
event_time
=
0
;
g_state
==
ProfilerState
::
kAll
)
double
gpu_time
=
rit
->
CudaElapsedMs
((
*
analyze_events
)[
i
][
j
]);
?
rit
->
CudaElapsedMs
((
*
analyze_events
)[
i
][
j
])
double
cpu_time
=
rit
->
CpuElapsedMs
((
*
analyze_events
)[
i
][
j
]);
:
rit
->
CpuElapsedMs
((
*
analyze_events
)[
i
][
j
]);
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
event_time
=
gpu_time
;
}
else
if
(
g_state
==
ProfilerState
::
kCPU
)
{
event_time
=
cpu_time
;
}
else
{
event_time
=
gpu_time
+
cpu_time
;
}
total
+=
event_time
;
total
+=
event_time
;
std
::
string
event_name
;
std
::
string
event_name
;
...
@@ -407,7 +442,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -407,7 +442,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
event_idx
[
event_name
]
=
event_items
.
size
();
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
event_time
,
0.
};
gpu_time
,
cpu_time
,
0.
};
event_items
.
push_back
(
event_item
);
event_items
.
push_back
(
event_item
);
}
else
{
}
else
{
int
index
=
event_idx
[
event_name
];
int
index
=
event_idx
[
event_name
];
...
@@ -420,6 +455,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -420,6 +455,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
// max time
// max time
event_items
[
index
].
max_time
=
event_items
[
index
].
max_time
=
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
std
::
max
(
event_time
,
event_items
[
index
].
max_time
);
event_items
[
index
].
gpu_time
+=
gpu_time
;
event_items
[
index
].
cpu_time
+=
cpu_time
;
}
}
// remove the push marker from the list
// remove the push marker from the list
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
3b08c9ab
...
@@ -117,7 +117,16 @@ struct RecordBlock {
...
@@ -117,7 +117,16 @@ struct RecordBlock {
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
// Candidate keys to sort the profiling report
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
,
kCPUTime
,
kGPUTime
};
// Enable the profiling function.
// Enable the profiling function.
void
EnableProfiler
(
ProfilerState
state
);
void
EnableProfiler
(
ProfilerState
state
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录