Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
9d47c3ba
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9d47c3ba
编写于
4月 26, 2021
作者:
M
Megvii Engine Team
提交者:
huangxinda
7月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(profiler): imperative profiler support tracing
GitOrigin-RevId: b247472feba6d28416f52874c3517e50a8c2bd49
上级
cdcb46ba
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
272 addition
and
4 deletion
+272
-4
imperative/python/megengine/jit/tracing.py
imperative/python/megengine/jit/tracing.py
+14
-3
imperative/python/src/graph_rt.cpp
imperative/python/src/graph_rt.cpp
+5
-1
imperative/src/impl/profiler_plugin.cpp
imperative/src/impl/profiler_plugin.cpp
+207
-0
imperative/src/include/megbrain/imperative/profiler_plugin.h
imperative/src/include/megbrain/imperative/profiler_plugin.h
+46
-0
未找到文件。
imperative/python/megengine/jit/tracing.py
浏览文件 @
9d47c3ba
...
@@ -17,7 +17,7 @@ from typing import Any
...
@@ -17,7 +17,7 @@ from typing import Any
import
numpy
as
np
import
numpy
as
np
from
..core._imperative_rt
import
GraphProfiler
,
SerializationMetadata
from
..core._imperative_rt
import
GraphProfiler
,
GraphProfiler2
,
SerializationMetadata
from
..core._imperative_rt.core2
import
Tensor
as
RawTensor
from
..core._imperative_rt.core2
import
Tensor
as
RawTensor
from
..core._imperative_rt.core2
import
(
from
..core._imperative_rt.core2
import
(
TensorWeakRef
,
TensorWeakRef
,
...
@@ -39,6 +39,7 @@ from ..core.ops.special import Const
...
@@ -39,6 +39,7 @@ from ..core.ops.special import Const
from
..core.tensor
import
megbrain_graph
as
G
from
..core.tensor
import
megbrain_graph
as
G
from
..core.tensor.utils
import
setscalar
from
..core.tensor.utils
import
setscalar
from
..utils.naming
import
AutoNaming
from
..utils.naming
import
AutoNaming
from
..utils.profiler
import
is_profiling
from
.dtr_config
import
DTRConfig
from
.dtr_config
import
DTRConfig
from
.graph_opt_config
import
GraphOptimizationConfig
from
.graph_opt_config
import
GraphOptimizationConfig
from
.sublinear_memory_config
import
SublinearMemoryConfig
from
.sublinear_memory_config
import
SublinearMemoryConfig
...
@@ -160,6 +161,7 @@ class trace:
...
@@ -160,6 +161,7 @@ class trace:
self
.
_dtr_config
=
dtr_config
self
.
_dtr_config
=
dtr_config
self
.
_profiling
=
profiling
self
.
_profiling
=
profiling
self
.
_profiler
=
None
self
.
_profiler
=
None
self
.
_profiler2
=
None
self
.
_graph_opt_level
=
opt_level
self
.
_graph_opt_level
=
opt_level
self
.
_graph_opt_config
=
graph_opt_config
self
.
_graph_opt_config
=
graph_opt_config
self
.
_symbolic_shape
=
symbolic_shape
self
.
_symbolic_shape
=
symbolic_shape
...
@@ -382,7 +384,8 @@ class trace:
...
@@ -382,7 +384,8 @@ class trace:
lazy_eval_graph
.
options
.
graph_opt_level
=
self
.
_graph_opt_level
lazy_eval_graph
.
options
.
graph_opt_level
=
self
.
_graph_opt_level
lazy_eval_graph
.
_set_priority_to_id
([
*
lazy_eval_links
,
*
readers
])
lazy_eval_graph
.
_set_priority_to_id
([
*
lazy_eval_links
,
*
readers
])
lazy_eval_graph
.
compile
(
*
lazy_eval_links
,
*
readers
)
lazy_eval_graph
.
compile
(
*
lazy_eval_links
,
*
readers
)
lazy_eval_graph
()
self
.
_execute_graph
(
lazy_eval_graph
)
lazy_eval_graph
.
wait
()
for
r
,
x
in
zip
(
readers
,
lazy_eval_tensors
):
for
r
,
x
in
zip
(
readers
,
lazy_eval_tensors
):
# get values from lazy_eval_graph and assign to lazy_eval tensor
# get values from lazy_eval_graph and assign to lazy_eval tensor
x
.
_handle
=
RawTensor
(
r
.
op
.
get_value
()).
_handle
x
.
_handle
=
RawTensor
(
r
.
op
.
get_value
()).
_handle
...
@@ -401,7 +404,7 @@ class trace:
...
@@ -401,7 +404,7 @@ class trace:
else
:
else
:
if
self
.
_graph
is
None
:
if
self
.
_graph
is
None
:
self
.
_compile
()
self
.
_compile
()
self
.
_
graph
.
execute
(
)
self
.
_
execute_graph
(
self
.
_graph
)
def
do_finalize
():
def
do_finalize
():
escaped_tensors
=
self
.
_take_escaped_tensors
()
escaped_tensors
=
self
.
_take_escaped_tensors
()
...
@@ -532,9 +535,17 @@ class trace:
...
@@ -532,9 +535,17 @@ class trace:
# profile
# profile
if
self
.
_profiling
:
if
self
.
_profiling
:
self
.
_profiler
=
GraphProfiler
(
graph
)
self
.
_profiler
=
GraphProfiler
(
graph
)
self
.
_profiler2
=
None
if
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
)):
if
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
)):
graph
.
options
.
var_sanity_check_first_run
=
False
graph
.
options
.
var_sanity_check_first_run
=
False
def
_execute_graph
(
self
,
graph
:
G
.
Graph
,
*
args
):
if
is_profiling
()
and
(
self
.
_profiler2
is
None
):
self
.
_profiler2
=
GraphProfiler2
(
graph
)
elif
not
is_profiling
()
and
(
self
.
_profiler2
is
not
None
):
self
.
_profiler2
=
None
graph
.
execute
(
*
args
)
def
_compile
(
self
):
def
_compile
(
self
):
graph
=
self
.
_graph
=
G
.
Graph
()
graph
=
self
.
_graph
=
G
.
Graph
()
graph
.
options
.
async_exec_level
=
0b100
graph
.
options
.
async_exec_level
=
0b100
...
...
imperative/python/src/graph_rt.cpp
浏览文件 @
9d47c3ba
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
#include "./common.h"
#include "./common.h"
#include "./ops.h"
#include "./ops.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/imperative/profiler_plugin.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
...
@@ -239,6 +239,10 @@ void init_graph_rt(py::module m) {
...
@@ -239,6 +239,10 @@ void init_graph_rt(py::module m) {
}))
}))
.
def
(
"get"
,
[](
_CompGraphProfilerImpl
&
profiler
)
{
return
profiler
.
_get_result
();
});
.
def
(
"get"
,
[](
_CompGraphProfilerImpl
&
profiler
)
{
return
profiler
.
_get_result
();
});
using
interpreter
::
intl
::
ProfilerPlugin
;
py
::
class_
<
ProfilerPlugin
,
std
::
shared_ptr
<
ProfilerPlugin
>>
(
m
,
"GraphProfiler2"
)
.
def
(
py
::
init
<
cg
::
ComputingGraph
*>
());
auto
GraphOptimizeOptions
=
py
::
class_
<
_OptimizeForInferenceOptions
>
(
m
,
"GraphOptimizeOptions"
)
auto
GraphOptimizeOptions
=
py
::
class_
<
_OptimizeForInferenceOptions
>
(
m
,
"GraphOptimizeOptions"
)
.
def
(
py
::
init
())
.
def
(
py
::
init
())
.
def
(
"serialize"
,
&
_OptimizeForInferenceOptions
::
serialize
)
.
def
(
"serialize"
,
&
_OptimizeForInferenceOptions
::
serialize
)
...
...
imperative/src/impl/profiler_plugin.cpp
0 → 100644
浏览文件 @
9d47c3ba
/**
* \file imperative/src/impl/profiler_plugin.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megbrain/imperative/profiler_plugin.h"
#include "megbrain/graph.h"
#include "megbrain/graph/event.h"
#include "./profiler/events.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
ProfilerPlugin
::
ProfilerPlugin
(
cg
::
ComputingGraph
*
graph
)
:
PluginBase
(
graph
)
{
using
namespace
cg
;
using
namespace
cg
::
event
;
using
namespace
profiler
;
auto
on_seq_start
=
[
this
](
CompSeqExecBeforeStart
const
&
event
)
{
// reset
mgb_assert
(
!
event
.
graph
->
options
().
imperative_proxy_graph
);
if
(
m_opr_dict
.
empty
()
&&
m_var_dict
.
empty
())
{
init_seq
(
event
.
exec
);
}
Profiler
::
record
<
ScopeEvent
>
(
"DispatchOprs"
);
event
.
exec
->
iter_opr_seq
([
this
](
OperatorNodeBase
*
opr
)
->
bool
{
auto
&
opr_info
=
get_opr_info
(
opr
);
SmallVector
<
uint64_t
>
inputs
;
for
(
auto
input
:
opr
->
input
())
{
inputs
.
push_back
(
get_var_info
(
input
).
id
);
}
SmallVector
<
uint64_t
>
outputs
;
for
(
auto
output
:
opr
->
output
())
{
outputs
.
push_back
(
get_var_info
(
output
).
id
);
}
auto
opr_name
=
opr
->
dyn_typeinfo
()
->
name
;
auto
copy_params
=
[
params
=
opr_info
.
params
]
{
return
*
params
;
};
Profiler
::
record
<
OpDispatchEvent
>
(
opr_info
.
id
,
opr_name
,
copy_params
,
inputs
,
outputs
);
for
(
auto
output
:
opr
->
output
())
{
auto
var_id
=
get_var_info
(
output
).
id
;
Profiler
::
record
<
TensorDeclareEvent
>
(
var_id
);
}
return
true
;
});
Profiler
::
record
<
ScopeFinishEvent
>
(
"DispatchOprs"
);
Profiler
::
record
<
ScopeEvent
>
(
"Constants"
);
for
(
auto
&&
[
var
,
var_info
]
:
m_var_dict
)
{
if
(
var_info
->
is_const
)
{
bool
valid
=
var
->
dev_tensor_valid
();
auto
layout
=
valid
?
var
->
layout
()
:
TensorLayout
();
Profiler
::
record
<
TensorDeclareEvent
>
(
var_info
->
id
);
Profiler
::
record
<
TensorProduceEvent
>
(
var_info
->
id
,
layout
,
var
->
comp_node
(),
valid
?
var
->
dev_tensor
().
raw_ptr
()
:
nullptr
);
}
else
{
var_info
->
rt_ref_cnt
=
var_info
->
ref_cnt
;
}
}
Profiler
::
record
<
ScopeFinishEvent
>
(
"Constants"
);
};
auto
on_opr_start
=
[
this
](
OprExecStart
const
&
event
)
{
OperatorNodeBase
*
opr
=
event
.
opr
;
auto
&
opr_info
=
get_opr_info
(
opr
);
auto
comp_node
=
opr_info
.
comp_node
;
auto
runner
=
[
&
opr_info
]
{
Profiler
::
record
<
OpExecuteEvent
>
(
opr_info
.
id
);
};
event
.
env
->
dispatch_on_comp_node
(
comp_node
,
runner
);
auto
inputs
=
opr
->
input
();
for
(
auto
&&
input
:
inputs
)
{
auto
&
var_info
=
get_var_info
(
input
);
auto
runner
=
[
&
var_info
,
input
]
{
auto
inp_id
=
var_info
.
id
;
Profiler
::
record
<
OpInputEvent
>
(
inp_id
,
input
->
shape
());
Profiler
::
record
<
TensorUsageEvent
>
(
inp_id
);
Profiler
::
record
<
OpInputFinishEvent
>
(
inp_id
,
input
->
shape
());
};
event
.
env
->
dispatch_on_comp_node
(
comp_node
,
runner
);
}
};
auto
on_opr_finish
=
[
this
](
OprExecKernelEnd
const
&
event
)
{
OperatorNodeBase
*
opr
=
event
.
opr
;
auto
&
opr_info
=
get_opr_info
(
opr
);
auto
comp_node
=
opr_info
.
comp_node
;
auto
inputs
=
opr
->
input
();
auto
outputs
=
opr
->
output
();
for
(
auto
input
:
inputs
)
{
auto
&
var_info
=
get_var_info
(
input
);
auto
runner
=
[
&
var_info
]
{
if
(
!
var_info
.
is_const
)
{
if
(
--
var_info
.
rt_ref_cnt
==
0
)
{
Profiler
::
record
<
TensorReleaseEvent
>
(
var_info
.
id
);
}
}
};
event
.
env
->
dispatch_on_comp_node
(
comp_node
,
runner
);
}
for
(
auto
output
:
outputs
)
{
auto
&
var_info
=
get_var_info
(
output
);
mgb_assert
(
comp_node
==
output
->
comp_node
(),
"opr comp_node mismatch"
);
auto
runner
=
[
&
var_info
,
output
]
{
auto
out_id
=
var_info
.
id
;
bool
valid
=
output
->
dev_tensor_valid
();
auto
layout
=
valid
?
output
->
layout
()
:
TensorLayout
();
Profiler
::
record
<
OpOutputEvent
>
(
out_id
,
output
->
shape
());
Profiler
::
record
<
TensorProduceEvent
>
(
out_id
,
layout
,
output
->
comp_node
(),
valid
?
output
->
dev_tensor
().
raw_ptr
()
:
nullptr
);
if
(
!
var_info
.
ref_cnt
)
{
Profiler
::
record
<
TensorReleaseEvent
>
(
var_info
.
id
);
}
Profiler
::
record
<
OpOutputFinishEvent
>
(
out_id
,
output
->
shape
());
};
event
.
env
->
dispatch_on_comp_node
(
comp_node
,
runner
);
}
auto
runner
=
[
&
opr_info
]()
{
Profiler
::
record
<
OpExecuteFinishEvent
>
(
opr_info
.
id
);
};
event
.
env
->
dispatch_on_comp_node
(
comp_node
,
runner
);
};
auto
on_before_kern
=
[
this
](
BeforeKernel
const
&
event
)
{
OperatorNodeBase
*
opr
=
event
.
opr
;
Profiler
::
record
<
KernelExecuteEvent
>
(
get_opr_info
(
opr
).
id
,
get_opr_info
(
opr
).
id
,
Timer
::
record_event
(
event
.
comp_node
));
};
auto
on_after_kern
=
[
this
](
AfterKernel
const
&
event
)
{
OperatorNodeBase
*
opr
=
event
.
opr
;
Profiler
::
record
<
KernelExecuteFinishEvent
>
(
get_opr_info
(
opr
).
id
,
get_opr_info
(
opr
).
id
,
Timer
::
record_event
(
event
.
comp_node
));
};
auto
on_graph_compile
=
[
this
](
const
CompSeqOrderDetermined
&
)
{
m_opr_dict
.
clear
();
m_var_dict
.
clear
();
};
auto
on_seq_finish
=
[
this
](
CompSeqExecFinished
const
&
event
)
{
for
(
auto
&&
[
var
,
var_info
]
:
m_var_dict
)
{
MGB_MARK_USED_VAR
(
var
);
if
(
var_info
->
is_const
)
{
Profiler
::
record
<
TensorReleaseEvent
>
(
var_info
->
id
);
}
Profiler
::
record
<
TensorEraseEvent
>
(
var_info
->
id
,
var_info
->
ref_cnt
);
}
};
add_event_handler
(
graph
->
event
().
register_receiver
<
CompSeqExecBeforeStart
>
(
on_seq_start
));
add_event_handler
(
graph
->
event
().
register_receiver
<
OprExecStart
>
(
on_opr_start
));
add_event_handler
(
graph
->
event
().
register_receiver
<
OprExecKernelEnd
>
(
on_opr_finish
));
add_event_handler
(
graph
->
event
().
register_receiver
<
BeforeKernel
>
(
on_before_kern
));
add_event_handler
(
graph
->
event
().
register_receiver
<
AfterKernel
>
(
on_after_kern
));
add_event_handler
(
graph
->
event
().
register_receiver
<
CompSeqOrderDetermined
>
(
on_graph_compile
));
add_event_handler
(
graph
->
event
().
register_receiver
<
CompSeqExecFinished
>
(
on_seq_finish
));
}
void
ProfilerPlugin
::
init_seq
(
cg
::
AsyncExecutable
*
comp_seq
)
{
mgb_assert
(
m_opr_dict
.
empty
());
mgb_assert
(
m_var_dict
.
empty
());
comp_seq
->
iter_opr_seq
([
this
](
cg
::
OperatorNodeBase
*
opr
){
auto
comp_nodes
=
get_opr_comp_node_set
(
opr
);
mgb_assert
(
comp_nodes
.
size
()
==
1
);
register_opr
(
opr
);
for
(
auto
&&
input
:
opr
->
input
())
{
if
(
m_var_dict
.
count
(
input
)
==
0
)
{
register_var
(
input
).
is_const
=
true
;
}
else
{
get_var_info
(
input
).
ref_cnt
++
;
}
}
for
(
auto
&&
output
:
opr
->
output
())
{
register_var
(
output
).
is_const
=
false
;
}
//TODO: check ref_cnt
return
true
;
});
}
ProfilerPlugin
::
OprInfo
&
ProfilerPlugin
::
register_opr
(
cg
::
OperatorNodeBase
*
opr
)
{
OprInfo
info
;
info
.
id
=
Profiler
::
next_id
();
auto
params
=
std
::
make_shared
<
std
::
unordered_map
<
std
::
string
,
std
::
string
>>
();
auto
params_json
=
opr
->
to_json
();
for
(
auto
&&
[
k
,
v
]
:
params_json
->
cast_final
<
json
::
Object
>
().
get_impl
())
{
params
->
insert
({
k
.
get_impl
(),
v
->
to_string
()});
}
info
.
params
=
std
::
move
(
params
);
auto
comp_nodes
=
cg
::
get_opr_comp_node_set
(
opr
);
mgb_assert
(
comp_nodes
.
size
()
==
1
,
"only support single comp_node opr"
);
info
.
comp_node
=
*
comp_nodes
.
begin
();
return
m_opr_dict
.
insert
({
opr
,
info
}).
first
->
second
;
}
ProfilerPlugin
::
VarInfo
&
ProfilerPlugin
::
register_var
(
cg
::
VarNode
*
var
)
{
auto
info
=
std
::
make_unique
<
VarInfo
>
();
info
->
id
=
Profiler
::
next_id
();
info
->
is_const
=
false
;
info
->
ref_cnt
=
0
;
info
->
rt_ref_cnt
=
0
;
return
*
m_var_dict
.
insert
({
var
,
std
::
move
(
info
)}).
first
->
second
;
}
ProfilerPlugin
::
OprInfo
&
ProfilerPlugin
::
get_opr_info
(
cg
::
OperatorNodeBase
*
opr
)
{
return
m_opr_dict
.
at
(
opr
);
}
ProfilerPlugin
::
VarInfo
&
ProfilerPlugin
::
get_var_info
(
cg
::
VarNode
*
var
)
{
return
*
m_var_dict
.
at
(
var
);
}
}
imperative/src/include/megbrain/imperative/profiler_plugin.h
0 → 100644
浏览文件 @
9d47c3ba
/**
* \file imperative/src/impl/interpreter/profiler.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megbrain/plugin/base.h"
#include "megbrain/imperative/profiler.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
class
ProfilerPlugin
:
public
PluginBase
{
public:
struct
OprInfo
{
uint64_t
id
;
CompNode
comp_node
;
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
string
>>
params
;
};
struct
VarInfo
{
uint64_t
id
;
bool
is_const
;
size_t
ref_cnt
;
std
::
atomic_size_t
rt_ref_cnt
;
};
private:
std
::
unordered_map
<
cg
::
OperatorNodeBase
*
,
OprInfo
>
m_opr_dict
;
std
::
unordered_map
<
cg
::
VarNode
*
,
std
::
unique_ptr
<
VarInfo
>>
m_var_dict
;
public:
explicit
ProfilerPlugin
(
cg
::
ComputingGraph
*
graph
);
void
init_seq
(
cg
::
AsyncExecutable
*
comp_seq
);
OprInfo
&
register_opr
(
cg
::
OperatorNodeBase
*
opr
);
VarInfo
&
register_var
(
cg
::
VarNode
*
var
);
OprInfo
&
get_opr_info
(
cg
::
OperatorNodeBase
*
opr
);
VarInfo
&
get_var_info
(
cg
::
VarNode
*
var
);
};
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录