Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
deb04809
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
deb04809
编写于
12月 04, 2018
作者:
Z
ZongwuYang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test=develop
Fix the bug that profiler cannot trace the nccl allreduce operator
上级
6224e61f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
14 addition
and
11 deletion
+14
-11
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+10
-9
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+4
-2
未找到文件。
paddle/fluid/platform/device_tracer.cc
浏览文件 @
deb04809
...
...
@@ -143,7 +143,7 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
case
CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL
:
{
auto
*
kernel
=
reinterpret_cast
<
const
CUpti_ActivityKernel3
*>
(
record
);
tracer
->
AddKernelRecords
(
kernel
->
start
,
kernel
->
end
,
tracer
->
AddKernelRecords
(
kernel
->
name
,
kernel
->
start
,
kernel
->
end
,
kernel
->
deviceId
,
kernel
->
streamId
,
kernel
->
correlationId
);
break
;
...
...
@@ -224,8 +224,9 @@ class DeviceTracerImpl : public DeviceTracer {
stream_id
,
correlation_id
,
bytes
});
}
void
AddKernelRecords
(
uint64_t
start
,
uint64_t
end
,
int64_t
device_id
,
int64_t
stream_id
,
uint32_t
correlation_id
)
{
void
AddKernelRecords
(
std
::
string
name
,
uint64_t
start
,
uint64_t
end
,
int64_t
device_id
,
int64_t
stream_id
,
uint32_t
correlation_id
)
{
// 0 means timestamp information could not be collected for the kernel.
if
(
start
==
0
||
end
==
0
)
{
VLOG
(
30
)
<<
correlation_id
<<
" cannot be traced"
;
...
...
@@ -233,7 +234,7 @@ class DeviceTracerImpl : public DeviceTracer {
}
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
kernel_records_
.
push_back
(
KernelRecord
{
start
,
end
,
device_id
,
stream_id
,
correlation_id
});
KernelRecord
{
name
,
start
,
end
,
device_id
,
stream_id
,
correlation_id
});
}
bool
IsEnabled
()
{
...
...
@@ -276,13 +277,13 @@ class DeviceTracerImpl : public DeviceTracer {
profile_pb
.
set_start_ns
(
start_ns_
);
profile_pb
.
set_end_ns
(
end_ns_
);
for
(
const
KernelRecord
&
r
:
kernel_records_
)
{
if
(
correlations_
.
find
(
r
.
correlation_id
)
==
correlations_
.
end
())
{
fprintf
(
stderr
,
"cannot relate a kernel activity
\n
"
);
continue
;
}
auto
*
event
=
profile_pb
.
add_events
();
event
->
set_type
(
proto
::
Event
::
GPUKernel
);
event
->
set_name
(
correlations_
.
at
(
r
.
correlation_id
));
if
(
correlations_
.
find
(
r
.
correlation_id
)
!=
correlations_
.
end
())
{
event
->
set_name
(
correlations_
.
at
(
r
.
correlation_id
));
}
else
{
event
->
set_name
(
r
.
name
);
}
event
->
set_start_ns
(
r
.
start_ns
);
event
->
set_end_ns
(
r
.
end_ns
);
event
->
set_sub_device_id
(
r
.
stream_id
);
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
deb04809
...
...
@@ -39,6 +39,7 @@ inline uint64_t PosixInNsec() {
class
DeviceTracer
{
public:
struct
KernelRecord
{
std
::
string
name
;
uint64_t
start_ns
;
uint64_t
end_ns
;
int64_t
device_id
;
...
...
@@ -84,8 +85,9 @@ class DeviceTracer {
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation
// added before for human readability.
virtual
void
AddKernelRecords
(
uint64_t
start
,
uint64_t
end
,
int64_t
device_id
,
int64_t
stream_id
,
uint32_t
correlation_id
)
=
0
;
virtual
void
AddKernelRecords
(
std
::
string
name
,
uint64_t
start
,
uint64_t
end
,
int64_t
device_id
,
int64_t
stream_id
,
uint32_t
correlation_id
)
=
0
;
// Generate a proto after done (Disabled).
virtual
proto
::
Profile
GenProfile
(
const
std
::
string
&
profile_path
)
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录