Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
f95bb7b7
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
f95bb7b7
编写于
2月 04, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(mge/profiler): reduce profiler overhead
GitOrigin-RevId: 4e1fff9dc43341e1fd2cc414823cf31efd8e5831
上级
9cc732f8
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
21 addition
and
12 deletion
+21
-12
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+11
-7
imperative/src/include/megbrain/imperative/profiler.h
imperative/src/include/megbrain/imperative/profiler.h
+10
-5
未找到文件。
imperative/src/impl/interpreter/interpreter_impl.cpp
浏览文件 @
f95bb7b7
...
@@ -525,20 +525,24 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
...
@@ -525,20 +525,24 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
// Before wait
// Before wait
//TODO: split operator wait and execute so that OpWait could be corrected recorded.
//TODO: split operator wait and execute so that OpWait could be corrected recorded.
// Before execute
// Before execute
if
(
m_worker_state
.
profiler
->
is_profiling
())
{
m_worker_state
.
profiler
->
record_host
<
HostOpExecuteEvent
>
(
event_data
);
m_worker_state
.
profiler
->
record_host
<
HostOpExecuteEvent
>
(
event_data
);
for
(
auto
&&
device
:
devices
)
{
for
(
auto
&&
device
:
devices
)
{
sync_device_scope
(
device
);
sync_device_scope
(
device
);
m_worker_state
.
profiler
->
record_device
<
DeviceOpExecuteEvent
>
(
device
,
event_data
);
m_worker_state
.
profiler
->
record_device
<
DeviceOpExecuteEvent
>
(
device
,
event_data
);
}
}
}
// Apply op
// Apply op
// Here std::move is REQUIRED for removing duplicated references.
// Here std::move is REQUIRED for removing duplicated references.
auto
tensor_outputs
=
OpDef
::
apply_on_physical_tensor
(
auto
tensor_outputs
=
OpDef
::
apply_on_physical_tensor
(
*
cmd
.
op
,
std
::
move
(
tensor_inputs
));
*
cmd
.
op
,
std
::
move
(
tensor_inputs
));
// After execute
// After execute
if
(
m_worker_state
.
profiler
->
is_profiling
())
{
m_worker_state
.
profiler
->
record_host
<
HostOpFinishEvent
>
(
event_data
);
m_worker_state
.
profiler
->
record_host
<
HostOpFinishEvent
>
(
event_data
);
for
(
auto
&&
device
:
devices
)
{
for
(
auto
&&
device
:
devices
)
{
m_worker_state
.
profiler
->
record_device
<
DeviceOpFinishEvent
>
(
device
,
event_data
);
m_worker_state
.
profiler
->
record_device
<
DeviceOpFinishEvent
>
(
device
,
event_data
);
}
}
}
// End profiling operator
// End profiling operator
mgb_assert
(
tensor_outputs
.
size
()
==
cmd
.
outputs
.
size
());
mgb_assert
(
tensor_outputs
.
size
()
==
cmd
.
outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
tensor_outputs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
tensor_outputs
.
size
();
++
i
)
{
...
...
imperative/src/include/megbrain/imperative/profiler.h
浏览文件 @
f95bb7b7
...
@@ -140,27 +140,32 @@ public:
...
@@ -140,27 +140,32 @@ public:
public:
public:
template
<
typename
TEvent
,
typename
...
TArgs
>
template
<
typename
TEvent
,
typename
...
TArgs
>
void
record_host
(
TArgs
&&
...
args
)
{
void
record_host
(
TArgs
&&
...
args
)
{
auto
instant
=
HostInstant
{
std
::
this_thread
::
get_id
(),
m_host_timer
.
get_msecs
()};
MGB_LOCK_GUARD
(
m_lock
);
MGB_LOCK_GUARD
(
m_lock
);
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
return
;
return
;
}
}
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
auto
instant
=
HostInstant
{
std
::
this_thread
::
get_id
(),
m_host_timer
.
get_msecs
()};
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
}
}
template
<
typename
TEvent
,
typename
...
TArgs
>
template
<
typename
TEvent
,
typename
...
TArgs
>
void
record_device
(
Device
device
,
TArgs
&&
...
args
)
{
void
record_device
(
Device
device
,
TArgs
&&
...
args
)
{
auto
before
=
m_host_timer
.
get_msecs
();
auto
event
=
m_device_timer
.
get_device_time
(
device
);
auto
after
=
m_host_timer
.
get_msecs
();
auto
instant
=
DeviceInstant
{
before
,
event
,
after
};
MGB_LOCK_GUARD
(
m_lock
);
MGB_LOCK_GUARD
(
m_lock
);
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
return
;
return
;
}
}
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
auto
before
=
m_host_timer
.
get_msecs
();
auto
event
=
m_device_timer
.
get_device_time
(
device
);
auto
after
=
m_host_timer
.
get_msecs
();
auto
instant
=
DeviceInstant
{
before
,
event
,
after
};
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
}
}
// unsafe
bool
is_profiling
()
{
MGB_LOCK_GUARD
(
m_lock
);
return
m_status
==
Profiling
;
}
void
start
(
Mask
mask
)
{
void
start
(
Mask
mask
)
{
MGB_LOCK_GUARD
(
m_lock
);
MGB_LOCK_GUARD
(
m_lock
);
mgb_assert
(
m_status
==
NotStarted
,
"profiler already started"
);
mgb_assert
(
m_status
==
NotStarted
,
"profiler already started"
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录