Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
55b2d3d0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
55b2d3d0
编写于
3月 01, 2018
作者:
X
Xin Pan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add CPU time to the timeline.
上级
9fbe2e36
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
64 addition
and
14 deletion
+64
-14
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+22
-2
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+11
-1
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+17
-6
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+1
-0
paddle/fluid/platform/profiler.proto
paddle/fluid/platform/profiler.proto
+2
-1
tools/timeline.py
tools/timeline.py
+11
-4
未找到文件。
paddle/fluid/platform/device_tracer.cc
浏览文件 @
55b2d3d0
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <map>
#include <mutex>
#include <numeric>
#include <thread>
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/string/printf.h"
...
...
@@ -140,6 +141,13 @@ class DeviceTracerImpl : public DeviceTracer {
correlations_
[
id
]
=
anno
;
}
void
AddCPURecords
(
const
char
*
anno
,
uint64_t
start_ns
,
uint64_t
end_ns
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
cpu_records_
.
push_back
(
CPURecord
{
anno
,
start_ns
,
end_ns
,
std
::
hash
<
std
::
thread
::
id
>
{}(
std
::
this_thread
::
get_id
())});
}
void
AddKernelRecords
(
uint64_t
start
,
uint64_t
end
,
uint32_t
device_id
,
uint32_t
stream_id
,
uint32_t
correlation_id
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
...
...
@@ -185,7 +193,6 @@ class DeviceTracerImpl : public DeviceTracer {
proto
::
Profile
profile_pb
;
profile_pb
.
set_start_ns
(
start_ns_
);
profile_pb
.
set_end_ns
(
end_ns_
);
std
::
map
<
std
::
string
,
std
::
vector
<
uint64_t
>>
event_times
;
for
(
const
KernelRecord
&
r
:
kernel_records_
)
{
if
(
correlations_
.
find
(
r
.
correlation_id
)
==
correlations_
.
end
())
{
fprintf
(
stderr
,
"cannot relate a kernel activity
\n
"
);
...
...
@@ -197,7 +204,15 @@ class DeviceTracerImpl : public DeviceTracer {
event
->
set_end_ns
(
r
.
end_ns
);
event
->
set_stream_id
(
r
.
stream_id
);
event
->
set_device_id
(
r
.
device_id
);
event_times
[
event
->
name
()].
push_back
(
r
.
end_ns
-
r
.
start_ns
);
}
for
(
const
CPURecord
&
r
:
cpu_records_
)
{
auto
*
event
=
profile_pb
.
add_events
();
event
->
set_name
(
r
.
name
);
event
->
set_start_ns
(
r
.
start_ns
);
event
->
set_end_ns
(
r
.
end_ns
);
event
->
set_stream_id
(
r
.
thread_id
);
event
->
set_device_id
(
-
1
);
}
std
::
string
profile_str
;
google
::
protobuf
::
TextFormat
::
PrintToString
(
profile_pb
,
&
profile_str
);
...
...
@@ -242,6 +257,7 @@ class DeviceTracerImpl : public DeviceTracer {
uint64_t
start_ns_
;
uint64_t
end_ns_
;
std
::
vector
<
KernelRecord
>
kernel_records_
;
std
::
vector
<
CPURecord
>
cpu_records_
;
std
::
unordered_map
<
uint32_t
,
std
::
string
>
correlations_
;
CUpti_SubscriberHandle
subscriber_
;
};
...
...
@@ -254,6 +270,8 @@ class DeviceTracerDummy : public DeviceTracer {
void
AddAnnotation
(
uint64_t
id
,
const
std
::
string
&
anno
)
{}
void
AddCPURecords
(
const
char
*
anno
,
uint64_t
start_ns
,
uint64_t
end_ns
)
{}
void
AddKernelRecords
(
uint64_t
start
,
uint64_t
end
,
uint32_t
device_id
,
uint32_t
stream_id
,
uint32_t
correlation_id
)
{}
...
...
@@ -285,5 +303,7 @@ void SetCurAnnotation(const char *anno) { cur_annotation = anno; }
void
ClearCurAnnotation
()
{
cur_annotation
=
nullptr
;
}
const
char
*
CurAnnotation
()
{
return
cur_annotation
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device_tracer.h
浏览文件 @
55b2d3d0
...
...
@@ -36,6 +36,12 @@ class DeviceTracer {
uint32_t
stream_id
;
uint32_t
correlation_id
;
};
struct
CPURecord
{
std
::
string
name
;
uint64_t
start_ns
;
uint64_t
end_ns
;
uint64_t
thread_id
;
};
virtual
~
DeviceTracer
()
{}
// Needs to be called once before use.
...
...
@@ -48,6 +54,9 @@ class DeviceTracer {
// human-readable annotations.
virtual
void
AddAnnotation
(
uint64_t
id
,
const
std
::
string
&
anno
)
=
0
;
virtual
void
AddCPURecords
(
const
char
*
anno
,
uint64_t
start_ns
,
uint64_t
end_ns
)
=
0
;
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation
// added before for human readability.
virtual
void
AddKernelRecords
(
uint64_t
start
,
uint64_t
end
,
...
...
@@ -67,6 +76,7 @@ DeviceTracer* GetDeviceTracer();
void
SetCurAnnotation
(
const
char
*
anno
);
// Clear the name after the operation is done.
void
ClearCurAnnotation
();
// Current name of the operation being run in the thread.
const
char
*
CurAnnotation
();
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler.cc
浏览文件 @
55b2d3d0
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include <sys/time.h>
#include <time.h>
#include <iomanip>
#include <map>
#ifdef PADDLE_WITH_CUDA
...
...
@@ -52,6 +54,12 @@ inline uint64_t GetTimeInNsec() {
.
count
();
}
inline
uint64_t
PosixInNsec
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
Event
::
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
const
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
...
...
@@ -132,8 +140,8 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
GetEventList
().
Record
(
EventKind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
:
start_ns_
(
PosixInNsec
()
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
dev_ctx_
=
dev_ctx
;
name_
=
name
;
...
...
@@ -144,6 +152,10 @@ RecordEvent::RecordEvent(const std::string& name,
RecordEvent
::~
RecordEvent
()
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
tracer
)
{
tracer
->
AddCPURecords
(
CurAnnotation
(),
start_ns_
,
PosixInNsec
());
}
ClearCurAnnotation
();
PopEvent
(
name_
,
dev_ctx_
);
}
...
...
@@ -207,15 +219,14 @@ void DisableProfiler(EventSortingKey sorted_key,
Mark
(
"_stop_profiler_"
,
nullptr
);
g_state
=
ProfilerState
::
kDisabled
;
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
ParseEvents
(
all_events
,
sorted_key
);
ResetProfiler
();
DeviceTracer
*
tracer
=
GetDeviceTracer
();
if
(
g_profiler_place
==
"All"
&&
tracer
&&
tracer
->
IsEnabled
())
{
tracer
->
Disable
();
tracer
->
GenProfile
(
profile_path
);
}
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
ParseEvents
(
all_events
,
sorted_key
);
ResetProfiler
();
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
55b2d3d0
...
...
@@ -108,6 +108,7 @@ struct RecordEvent {
~
RecordEvent
();
uint64_t
start_ns_
;
// The device context is used by Event to get the current cuda stream.
const
DeviceContext
*
dev_ctx_
;
// Event name
...
...
paddle/fluid/platform/profiler.proto
浏览文件 @
55b2d3d0
...
...
@@ -19,7 +19,8 @@ message Event {
optional
string
name
=
1
;
optional
uint64
start_ns
=
2
;
optional
uint64
end_ns
=
3
;
optional
uint32
device_id
=
5
;
// When positive, it represents gpu id. When -1, it represents CPU.
optional
int32
device_id
=
5
;
optional
uint32
stream_id
=
6
;
}
...
...
tools/timeline.py
浏览文件 @
55b2d3d0
...
...
@@ -124,15 +124,22 @@ class Timeline(object):
if
event
.
device_id
not
in
self
.
_devices
:
pid
=
self
.
_allocate_pid
()
self
.
_devices
[
event
.
device_id
]
=
pid
self
.
_chrome_trace
.
emit_pid
(
"device:%s"
%
pid
,
pid
)
if
event
.
device_id
>=
0
:
self
.
_chrome_trace
.
emit_pid
(
"gpu:%s:stream:%d"
%
(
pid
,
event
.
stream_id
),
pid
)
elif
event
.
device_id
==
-
1
:
self
.
_chrome_trace
.
emit_pid
(
"cpu:thread_hash:%d"
%
event
.
stream_id
,
pid
)
def
_allocate_events
(
self
):
for
event
in
self
.
_profile_pb
.
events
:
pid
=
self
.
_devices
[
event
.
device_id
]
args
=
{
'name'
:
event
.
name
}
self
.
_chrome_trace
.
emit_region
(
event
.
start_ns
,
(
event
.
end_ns
-
event
.
start_ns
)
/
1000000.0
,
pid
,
0
,
'Op'
,
event
.
name
,
args
)
# TODO(panyx0718): Chrome tracing only handles ms. However, some
# ops takes micro-seconds. Hence, we keep the ns here.
self
.
_chrome_trace
.
emit_region
(
event
.
start_ns
,
(
event
.
end_ns
-
event
.
start_ns
)
/
1.0
,
pid
,
0
,
'Op'
,
event
.
name
,
args
)
def
generate_chrome_trace
(
self
):
self
.
_allocate_pids
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录