Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
dbb3dd68
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
dbb3dd68
编写于
1月 25, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(profiler): integrate profiler into interpreter
GitOrigin-RevId: ccc984acbdc138390745e96b9b7cdd61a2737acd
上级
ff05667b
变更
30
展开全部
隐藏空白更改
内联
并排
Showing
30 changed file
with
2058 addition
and
607 deletion
+2058
-607
imperative/python/megengine/autodiff/grad_manager.py
imperative/python/megengine/autodiff/grad_manager.py
+3
-0
imperative/python/megengine/core/__init__.py
imperative/python/megengine/core/__init__.py
+12
-0
imperative/python/megengine/module/module.py
imperative/python/megengine/module/module.py
+12
-0
imperative/python/megengine/optimizer/optimizer.py
imperative/python/megengine/optimizer/optimizer.py
+5
-0
imperative/python/megengine/utils/profiler.py
imperative/python/megengine/utils/profiler.py
+50
-213
imperative/python/src/tensor.cpp
imperative/python/src/tensor.cpp
+23
-5
imperative/python/src/utils.cpp
imperative/python/src/utils.cpp
+0
-27
imperative/python/test/integration/test_profiler.py
imperative/python/test/integration/test_profiler.py
+54
-0
imperative/src/impl/function_hook.h
imperative/src/impl/function_hook.h
+17
-32
imperative/src/impl/interpreter/commands.h
imperative/src/impl/interpreter/commands.h
+231
-0
imperative/src/impl/interpreter/events.h
imperative/src/impl/interpreter/events.h
+92
-0
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+226
-76
imperative/src/impl/interpreter/interpreter_impl.h
imperative/src/impl/interpreter/interpreter_impl.h
+205
-0
imperative/src/impl/interpreter/option_manager.h
imperative/src/impl/interpreter/option_manager.h
+61
-0
imperative/src/impl/interpreter/profiler.cpp
imperative/src/impl/interpreter/profiler.cpp
+280
-0
imperative/src/impl/interpreter/profiler.h
imperative/src/impl/interpreter/profiler.h
+97
-0
imperative/src/impl/interpreter/tensor_info.h
imperative/src/impl/interpreter/tensor_info.h
+135
-0
imperative/src/impl/op_def.cpp
imperative/src/impl/op_def.cpp
+20
-0
imperative/src/impl/op_trait.h
imperative/src/impl/op_trait.h
+3
-0
imperative/src/impl/ops/backward_graph.cpp
imperative/src/impl/ops/backward_graph.cpp
+6
-0
imperative/src/impl/ops/opr_attr.cpp
imperative/src/impl/ops/opr_attr.cpp
+5
-0
imperative/src/impl/profiler.cpp
imperative/src/impl/profiler.cpp
+22
-178
imperative/src/impl/proxy_graph/mini_graph.h
imperative/src/impl/proxy_graph/mini_graph.h
+1
-0
imperative/src/include/megbrain/imperative/interpreter.h
imperative/src/include/megbrain/imperative/interpreter.h
+9
-5
imperative/src/include/megbrain/imperative/op_def.h
imperative/src/include/megbrain/imperative/op_def.h
+18
-0
imperative/src/include/megbrain/imperative/profiler.h
imperative/src/include/megbrain/imperative/profiler.h
+279
-68
imperative/src/include/megbrain/imperative/utils/to_string.h
imperative/src/include/megbrain/imperative/utils/to_string.h
+125
-0
imperative/tablegen/autogen.cpp
imperative/tablegen/autogen.cpp
+16
-1
imperative/tablegen/helper.h
imperative/tablegen/helper.h
+44
-2
src/core/include/megbrain/ir/base.td
src/core/include/megbrain/ir/base.td
+7
-0
未找到文件。
imperative/python/megengine/autodiff/grad_manager.py
浏览文件 @
dbb3dd68
...
@@ -3,6 +3,7 @@ from collections import defaultdict
...
@@ -3,6 +3,7 @@ from collections import defaultdict
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
from
typing
import
Callable
from
typing
import
Callable
from
..core._imperative_rt.core2
import
pop_scope
,
push_scope
from
..core.autodiff.grad
import
Grad
from
..core.autodiff.grad
import
Grad
from
..logger
import
get_logger
from
..logger
import
get_logger
from
..tensor
import
Tensor
from
..tensor
import
Tensor
...
@@ -239,6 +240,7 @@ class GradManager:
...
@@ -239,6 +240,7 @@ class GradManager:
:param y: tensor or list of tensors
:param y: tensor or list of tensors
:param dy: tensor or list of tensors. Defaults to 1 if y is scalar
:param dy: tensor or list of tensors. Defaults to 1 if y is scalar
"""
"""
push_scope
(
"backward"
)
from
..functional
import
ones_like
from
..functional
import
ones_like
global
backwarding_grad_manager
global
backwarding_grad_manager
...
@@ -280,6 +282,7 @@ class GradManager:
...
@@ -280,6 +282,7 @@ class GradManager:
finally
:
finally
:
self
.
release
()
self
.
release
()
backwarding_grad_manager
=
cache
backwarding_grad_manager
=
cache
pop_scope
(
"backward"
)
def
record
(
self
):
def
record
(
self
):
r
"""
r
"""
...
...
imperative/python/megengine/core/__init__.py
浏览文件 @
dbb3dd68
...
@@ -8,5 +8,17 @@
...
@@ -8,5 +8,17 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
import
os
import
sys
import
sys
from
contextlib
import
contextmanager
from
._imperative_rt.core2
import
get_option
,
set_option
from
.tensor.megbrain_graph
import
Graph
from
.tensor.megbrain_graph
import
Graph
@
contextmanager
def
option
(
key
,
value
):
value
=
int
(
value
)
old
=
get_option
(
key
)
set_option
(
key
,
value
)
yield
assert
get_option
(
key
)
==
value
set_option
(
key
,
old
)
imperative/python/megengine/module/module.py
浏览文件 @
dbb3dd68
...
@@ -12,6 +12,7 @@ from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union
...
@@ -12,6 +12,7 @@ from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union
import
numpy
as
np
import
numpy
as
np
from
..core._imperative_rt.core2
import
pop_scope
,
push_scope
from
..core.tensor.utils
import
make_shape_tuple
from
..core.tensor.utils
import
make_shape_tuple
from
..logger
import
get_logger
from
..logger
import
get_logger
from
..tensor
import
Parameter
,
Tensor
from
..tensor
import
Parameter
,
Tensor
...
@@ -78,6 +79,7 @@ class Module(metaclass=ABCMeta):
...
@@ -78,6 +79,7 @@ class Module(metaclass=ABCMeta):
self
.
_forward_hooks
=
OrderedDict
()
self
.
_forward_hooks
=
OrderedDict
()
self
.
_modules
=
[]
self
.
_modules
=
[]
self
.
_name
=
"{anonymous}"
@
abstractmethod
@
abstractmethod
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
...
@@ -103,6 +105,7 @@ class Module(metaclass=ABCMeta):
...
@@ -103,6 +105,7 @@ class Module(metaclass=ABCMeta):
return
HookHandler
(
self
.
_forward_hooks
,
hook
)
return
HookHandler
(
self
.
_forward_hooks
,
hook
)
def
__call__
(
self
,
*
inputs
,
**
kwargs
):
def
__call__
(
self
,
*
inputs
,
**
kwargs
):
push_scope
(
self
.
_name
)
for
hook
in
self
.
_forward_pre_hooks
.
values
():
for
hook
in
self
.
_forward_pre_hooks
.
values
():
modified_inputs
=
hook
(
self
,
inputs
)
modified_inputs
=
hook
(
self
,
inputs
)
if
modified_inputs
is
not
None
:
if
modified_inputs
is
not
None
:
...
@@ -116,6 +119,7 @@ class Module(metaclass=ABCMeta):
...
@@ -116,6 +119,7 @@ class Module(metaclass=ABCMeta):
modified_outputs
=
hook
(
self
,
inputs
,
outputs
)
modified_outputs
=
hook
(
self
,
inputs
,
outputs
)
if
modified_outputs
is
not
None
:
if
modified_outputs
is
not
None
:
outputs
=
modified_outputs
outputs
=
modified_outputs
pop_scope
(
self
.
_name
)
return
outputs
return
outputs
def
_flatten
(
def
_flatten
(
...
@@ -571,6 +575,14 @@ class Module(metaclass=ABCMeta):
...
@@ -571,6 +575,14 @@ class Module(metaclass=ABCMeta):
return
set
(
loaded
),
set
(
skipped
)
return
set
(
loaded
),
set
(
skipped
)
def
__getattribute__
(
self
,
name
:
str
):
value
=
super
().
__getattribute__
(
name
)
if
name
==
"_name"
:
return
value
if
_is_module
(
value
):
value
.
_name
=
name
return
value
def
__setattr__
(
self
,
name
:
str
,
value
):
def
__setattr__
(
self
,
name
:
str
,
value
):
if
_is_module
(
value
):
if
_is_module
(
value
):
modules
=
self
.
__dict__
.
get
(
"_modules"
)
modules
=
self
.
__dict__
.
get
(
"_modules"
)
...
...
imperative/python/megengine/optimizer/optimizer.py
浏览文件 @
dbb3dd68
...
@@ -15,6 +15,7 @@ from typing import Union
...
@@ -15,6 +15,7 @@ from typing import Union
import
numpy
as
np
import
numpy
as
np
from
..core._imperative_rt.core2
import
pop_scope
,
push_scope
from
..core.tensor.utils
import
set_convert_inputs
from
..core.tensor.utils
import
set_convert_inputs
from
..tensor
import
Parameter
,
Tensor
from
..tensor
import
Parameter
,
Tensor
from
..utils.deprecation
import
deprecated
from
..utils.deprecation
import
deprecated
...
@@ -155,7 +156,9 @@ class Optimizer(metaclass=ABCMeta):
...
@@ -155,7 +156,9 @@ class Optimizer(metaclass=ABCMeta):
"but the ordering of parameters in sets will change between runs. "
"but the ordering of parameters in sets will change between runs. "
"Please use a list instead."
"Please use a list instead."
)
)
push_scope
(
"step"
)
self
.
_updates
(
group
)
self
.
_updates
(
group
)
pop_scope
(
"step"
)
# restore the globle state `_enable_convert_inputs`
# restore the globle state `_enable_convert_inputs`
set_convert_inputs
(
backup
)
set_convert_inputs
(
backup
)
return
self
return
self
...
@@ -172,8 +175,10 @@ class Optimizer(metaclass=ABCMeta):
...
@@ -172,8 +175,10 @@ class Optimizer(metaclass=ABCMeta):
Set the grad attribute to None for all parameters.
Set the grad attribute to None for all parameters.
"""
"""
for
param_group
in
self
.
param_groups
:
for
param_group
in
self
.
param_groups
:
push_scope
(
"clear_grad"
)
for
param
in
param_group
[
"params"
]:
for
param
in
param_group
[
"params"
]:
param
.
grad
=
None
param
.
grad
=
None
pop_scope
(
"clear_grad"
)
def
state_dict
(
self
)
->
Dict
:
def
state_dict
(
self
)
->
Dict
:
r
"""
r
"""
...
...
imperative/python/megengine/utils/profiler.py
浏览文件 @
dbb3dd68
...
@@ -6,159 +6,17 @@
...
@@ -6,159 +6,17 @@
# Unless required by applicable law or agreed to in writing,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
base64
import
json
import
json
import
os
from
contextlib
import
contextmanager
import
re
from
typing
import
List
from
typing
import
Iterable
,
List
,
Optional
from
..core._imperative_rt
import
OperatorNodeConfig
,
ProfileEntry
from
..core._imperative_rt.core2
import
(
from
..core._imperative_rt
import
ProfilerImpl
as
_Profiler
pop_scope
,
from
..core._imperative_rt.core2
import
sync
push_scope
,
from
..core._imperative_rt.ops
import
CollectiveComm
start_profile
,
stop_profile
,
sync
,
def
_make_dict
(
**
kwargs
):
)
unused_keys
=
[]
for
k
,
v
in
kwargs
.
items
():
if
v
is
None
:
unused_keys
.
append
(
k
)
for
k
in
unused_keys
:
del
kwargs
[
k
]
return
kwargs
def
_print_opnode_config
(
config
):
return
_make_dict
(
name
=
config
.
name
,
dtype
=
config
.
dtype
,
comp_node_arr
=
config
.
comp_node_arr
,
)
def
_dump_chrome_timeline
(
entries
:
List
[
ProfileEntry
],
path
:
str
):
pid
=
os
.
getpid
()
trace_events
=
[]
def
append_event
(
**
kwargs
):
trace_events
.
append
(
_make_dict
(
**
kwargs
))
for
id
,
entry
in
enumerate
(
entries
):
op
=
entry
.
op
name
=
type
(
op
).
__name__
host_begin
,
host_end
=
entry
.
host
device_list
=
entry
.
device_list
args
=
Profiler
.
fetch_attrs
(
op
)
args
[
"__id__"
]
=
"[{}]"
.
format
(
id
)
cat
=
name
for
ts
,
ph
in
[(
host_begin
,
"B"
),
(
host_end
,
"E"
)]:
append_event
(
name
=
name
,
ph
=
ph
,
ts
=
ts
*
1000
,
pid
=
pid
,
tid
=
"host"
,
args
=
args
,
cat
=
cat
,
)
for
device
,
device_begin
,
device_end
in
device_list
:
for
ts
,
ph
in
[(
device_begin
(),
"B"
),
(
device_end
(),
"E"
)]:
append_event
(
name
=
name
,
ph
=
ph
,
ts
=
ts
*
1000
,
pid
=
pid
,
tid
=
str
(
device
),
args
=
args
,
)
with
open
(
"{}.chrome_timeline.json"
.
format
(
path
),
"w"
)
as
f
:
json
.
dump
(
trace_events
,
f
,
indent
=
2
)
def
_dump_compatible
(
entries
:
List
[
ProfileEntry
],
path
:
str
):
obj
=
{
"graph_exec"
:
{
"var"
:
[],
"operator"
:
{}},
"profiler"
:
{
"device"
:
{},
"host"
:
{},
"opr_footprint"
:
{}},
}
var_list
=
obj
[
"graph_exec"
][
"var"
]
operator_dict
=
obj
[
"graph_exec"
][
"operator"
]
device_dict
=
obj
[
"profiler"
][
"device"
]
host_dict
=
obj
[
"profiler"
][
"host"
]
opr_foot_print_dict
=
obj
[
"profiler"
][
"opr_footprint"
]
def
add_var
(
var
)
->
int
:
var_id
=
len
(
var_list
)
var_list
.
append
(
{
"comp_node"
:
str
(
var
[
2
]),}
)
return
var_id
for
op_id
,
entry
in
enumerate
(
entries
):
operator_dict
[
op_id
]
=
{
"input"
:
[
add_var
(
var
)
for
var
in
entry
.
inputs
],
"output"
:
[
add_var
(
var
)
for
var
in
entry
.
outputs
],
"name"
:
str
(
entry
.
op
.
ctype
()),
"type"
:
"imperative"
,
"id"
:
entry
.
id
,
}
op_device_dict
=
{}
for
device
,
device_begin
,
device_end
in
entry
.
device_list
:
op_device_dict
[
str
(
device
)]
=
{
"start"
:
device_begin
(),
"kern"
:
device_begin
(),
"end"
:
device_end
(),
}
device_dict
[
op_id
]
=
op_device_dict
host_begin
,
host_end
=
entry
.
host
host_dict
[
op_id
]
=
{
"host"
:
{
"start"
:
host_begin
,
"kern"
:
host_begin
,
"end"
:
host_end
}
}
opr_footprint
=
{
"out_shapes"
:
[
oup
[
1
]
for
oup
in
entry
.
outputs
],
"in_shapes"
:
[
inp
[
1
]
for
inp
in
entry
.
inputs
],
"params"
:
{},
}
if
entry
.
memory
>
0
:
opr_footprint
[
"memory"
]
=
entry
.
memory
if
entry
.
computation
>
0
:
opr_footprint
[
"computation"
]
=
entry
.
computation
opr_foot_print_dict
[
op_id
]
=
opr_footprint
with
open
(
"{}.compatible.json"
.
format
(
path
),
"w"
)
as
f
:
json
.
dump
(
obj
,
f
,
indent
=
2
)
def
_dump_graphviz
(
entries
:
List
[
ProfileEntry
],
path
:
str
):
import
json
import
graphviz
graph
=
graphviz
.
Digraph
()
graph
.
graph_attr
[
"ordering"
]
=
"out"
var_cache
=
{}
def
cache_var
(
var_id
,
var_shape
):
if
var_id
not
in
var_cache
:
var_name
=
"var({})"
.
format
(
var_id
)
var_label
=
"{}
\n
shape:{}
\n
"
.
format
(
var_name
,
shape
)
graph
.
node
(
var_name
,
var_label
)
var_cache
[
var_id
]
=
var_name
return
var_cache
[
var_id
]
for
op_id
,
entry
in
enumerate
(
entries
):
op
=
entry
.
op
op_name
=
"op({})"
.
format
(
op_id
)
op_type
=
type
(
op
).
__name__
op_attrs
=
Profiler
.
fetch_attrs
(
op
)
label_lines
=
[]
if
"param"
in
op_attrs
:
del
op_attrs
[
"param"
]
label_lines
.
append
(
"{}:{}"
.
format
(
op_name
,
op_type
))
for
k
,
v
in
op_attrs
.
items
():
label_lines
.
append
(
"attr[{}]: {}"
.
format
(
k
,
v
))
op_param_str
=
entry
.
param
if
len
(
op_param_str
)
>
0
:
op_param
=
json
.
loads
(
op_param_str
)
for
k
,
v
in
op_param
.
items
():
label_lines
.
append
(
"param[{}]:{}"
.
format
(
k
,
v
))
host_begin
,
host_end
=
entry
.
host
label_lines
.
append
(
"time[host]: {:f}ms"
.
format
(
host_end
-
host_begin
))
for
device
,
device_begin
,
device_end
in
entry
.
device_list
:
device_time
=
device_end
()
-
device_begin
()
label_lines
.
append
(
"time[{}]: {:f}ms"
.
format
(
device
,
device_time
))
op_label
=
"
\n
"
.
join
(
label_lines
)
graph
.
node
(
op_name
,
op_label
,
shape
=
"rectangle"
)
for
var_id
,
shape
,
device
in
entry
.
inputs
:
graph
.
edge
(
cache_var
(
var_id
,
shape
),
op_name
)
for
var_id
,
shape
,
device
in
entry
.
outputs
:
graph
.
edge
(
op_name
,
cache_var
(
var_id
,
shape
))
graph
.
save
(
"{}.graphviz.dot"
.
format
(
path
))
class
Profiler
:
class
Profiler
:
...
@@ -181,85 +39,45 @@ class Profiler:
...
@@ -181,85 +39,45 @@ class Profiler:
# Only profile record of last iter would be saved
# Only profile record of last iter would be saved
with Profiler("profile"):
with Profiler("profile"):
# your code here
# your code here
# Then open the profile file in chrome timeline window
# Then open the profile file in chrome timeline window
"""
"""
CHROME_TIMELINE
=
"chrome_timeline"
CHROME_TIMELINE
=
"chrome_timeline.json"
COMPATIBLE
=
"compatible"
GRAPHVIZ
=
"graphviz"
WITH_FOOTPRINT
=
1
_type_map
=
{
COMMAND
=
1
<<
0
OperatorNodeConfig
:
lambda
x
:
_print_opnode_config
(
x
),
OPERATOR
=
1
<<
1
bytes
:
lambda
x
:
base64
.
encodebytes
(
x
).
decode
(
"ascii"
),
TENSOR_LIFETIME
=
1
<<
2
CollectiveComm
.
Mode
:
lambda
x
:
str
(
x
),
TENSOR_PROP
=
1
<<
3
}
SYNC
=
1
<<
4
SCOPE
=
1
<<
5
_dumper_map
=
{
ALL
=
(
1
<<
6
)
-
1
CHROME_TIMELINE
:
_dump_chrome_timeline
,
COMPATIBLE
:
_dump_compatible
,
GRAPHVIZ
:
_dump_graphviz
,
}
def
__init__
(
def
__init__
(
self
,
self
,
path
:
str
=
"profile"
,
path
:
str
=
"profile"
,
format
:
str
=
CHROME_TIMELINE
,
*
,
*
,
formats
:
Iterable
[
str
]
=
(
CHROME_TIMELINE
,)
,
topic
=
OPERATOR
|
SCOPE
,
type_filter
:
str
=
".*"
,
align_time
=
True
,
exit_dump
:
bool
=
True
show_operator_name
=
True
)
->
None
:
)
->
None
:
self
.
_impl
=
_Profiler
()
self
.
_path
=
path
self
.
_path
=
path
self
.
_format
=
format
if
isinstance
(
formats
,
str
):
self
.
_options
=
{
formats
=
(
formats
,)
"topic"
:
int
(
topic
),
"align_time"
:
int
(
align_time
),
self
.
_filter
=
type_filter
"show_operator_name"
:
int
(
show_operator_name
),
self
.
_dumpers
=
[
Profiler
.
_dumper_map
[
fmt
]
for
fmt
in
formats
]
}
self
.
_exit_dump
=
exit_dump
def
__enter__
(
self
):
def
__enter__
(
self
):
sync
()
start_profile
(
self
.
_options
)
self
.
_impl
.
start
(
Profiler
.
WITH_FOOTPRINT
)
return
self
return
self
def
__exit__
(
self
,
val
,
tp
,
trace
):
def
__exit__
(
self
,
val
,
tp
,
trace
):
if
self
.
_exit_dump
:
stop_profile
(
self
.
_path
,
self
.
_format
)
self
.
dump
()
# dump is async, so it's necessary to sync interpreter
sync
()
self
.
_impl
.
stop
()
self
.
_impl
.
clear
()
@
classmethod
def
fetch_attrs
(
cls
,
op
):
attrs
=
dir
(
op
)
results
=
{}
for
attr
in
attrs
:
if
attr
.
startswith
(
"_"
):
continue
value
=
op
.
__getattribute__
(
attr
)
if
callable
(
value
):
continue
value_type
=
type
(
value
)
if
value_type
in
cls
.
_type_map
:
value
=
cls
.
_type_map
[
value_type
](
value
)
results
[
attr
]
=
str
(
value
)
return
results
def
dump
(
self
,
path
:
Optional
[
str
]
=
None
):
sync
()
sync
()
raw
=
[
entry
for
entry
in
self
.
_impl
.
dump
()
if
re
.
match
(
self
.
_filter
,
type
(
entry
.
op
).
__name__
)
]
if
path
is
None
:
path
=
self
.
_path
for
dumper
in
self
.
_dumpers
:
dumper
(
raw
,
path
)
def
__call__
(
self
,
func
):
def
__call__
(
self
,
func
):
def
wrapper
(
*
args
,
**
kwargs
):
def
wrapper
(
*
args
,
**
kwargs
):
...
@@ -269,4 +87,23 @@ class Profiler:
...
@@ -269,4 +87,23 @@ class Profiler:
return
wrapper
return
wrapper
@
contextmanager
def
scope
(
name
):
push_scope
(
name
)
yield
pop_scope
(
name
)
profile
=
Profiler
profile
=
Profiler
def
merge_trace_events
(
sources
:
List
[
str
],
target
:
str
):
names
=
list
(
map
(
lambda
x
:
x
+
".chrome_timeline.json"
,
sources
))
result
=
[]
for
name
in
names
:
with
open
(
name
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
json
.
load
(
f
)
for
entry
in
content
:
result
.
append
(
entry
)
with
open
(
target
+
".chrome_timeline.json"
,
"w"
)
as
f
:
json
.
dump
(
result
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
imperative/python/src/tensor.cpp
浏览文件 @
dbb3dd68
...
@@ -807,16 +807,34 @@ void init_tensor(py::module m) {
...
@@ -807,16 +807,34 @@ void init_tensor(py::module m) {
}
}
}
}
m
.
def
(
"set_option"
,
[](
std
::
string
name
,
int
value
){
interpreter_for_py
->
set_option
(
name
,
value
);
});
m
.
def
(
"get_option"
,
[](
std
::
string
name
){
return
interpreter_for_py
->
get_option
(
name
);
});
m
.
def
(
"_set_swap_flag"
,
m
.
def
(
"_set_swap_flag"
,
[](
bool
flag
)
{
interpreter_for_py
->
set_
swap_flag
(
flag
);
});
[](
bool
flag
)
{
interpreter_for_py
->
set_
option
(
"enable_swap"
,
flag
);
});
m
.
def
(
"_set_drop_flag"
,
m
.
def
(
"_set_drop_flag"
,
[](
bool
flag
)
{
interpreter_for_py
->
set_
drop_flag
(
flag
);
});
[](
bool
flag
)
{
interpreter_for_py
->
set_
option
(
"enable_drop"
,
flag
);
});
m
.
def
(
"config_async_level"
,
m
.
def
(
"config_async_level"
,
[](
int
level
)
{
interpreter_for_py
->
config_async_level
(
level
);
});
[](
int
level
)
{
mgb_assert
(
level
>=
0
and
level
<=
2
,
"async_level should be 0, 1 or 2"
);
interpreter_for_py
->
set_option
(
"async_level"
,
level
);
});
m
.
def
(
"get_async_level"
,
m
.
def
(
"get_async_level"
,
[]()
{
return
interpreter_for_py
->
get_
async_level
(
);
});
[]()
{
return
interpreter_for_py
->
get_
option
(
"async_level"
);
});
m
.
def
(
"set_buffer_length"
,
m
.
def
(
"set_buffer_length"
,
[](
int
length
)
{
interpreter_for_py
->
set_buffer_length
(
length
);
});
[](
int
length
)
{
mgb_assert
(
length
>=
0
and
length
<
100
,
"buffer_length should be in [0, 100)"
);
interpreter_for_py
->
set_option
(
"buffer_length"
,
length
);
});
m
.
def
(
"push_scope"
,
[](
std
::
string
name
)
{
interpreter_for_py
->
push_scope
(
name
);
});
m
.
def
(
"pop_scope"
,
[](
std
::
string
name
)
{
interpreter_for_py
->
pop_scope
(
name
);
});
m
.
def
(
"start_profile"
,
[](
std
::
unordered_map
<
std
::
string
,
int
>
option
)
{
return
interpreter_for_py
->
start_profile
(
option
);
});
m
.
def
(
"stop_profile"
,
[](
std
::
string
basename
,
std
::
string
format
)
{
interpreter_for_py
->
stop_profile
(
basename
,
format
);
});
m
.
def
(
"sync"
,
m
.
def
(
"sync"
,
[]()
{
[]()
{
interpreter_for_py
->
sync
();
interpreter_for_py
->
sync
();
...
...
imperative/python/src/utils.cpp
浏览文件 @
dbb3dd68
...
@@ -200,33 +200,6 @@ void init_utils(py::module m) {
...
@@ -200,33 +200,6 @@ void init_utils(py::module m) {
m
.
def
(
"_get_device_count"
,
&
mgb
::
CompNode
::
get_device_count
,
m
.
def
(
"_get_device_count"
,
&
mgb
::
CompNode
::
get_device_count
,
"Get total number of specific devices on this system"
);
"Get total number of specific devices on this system"
);
using
mgb
::
imperative
::
ProfileEntry
;
py
::
class_
<
ProfileEntry
>
(
m
,
"ProfileEntry"
)
.
def_readwrite
(
"op"
,
&
ProfileEntry
::
op
)
.
def_readwrite
(
"host"
,
&
ProfileEntry
::
host
)
.
def_readwrite
(
"device_list"
,
&
ProfileEntry
::
device_list
)
.
def_readwrite
(
"inputs"
,
&
ProfileEntry
::
inputs
)
.
def_readwrite
(
"outputs"
,
&
ProfileEntry
::
outputs
)
.
def_readwrite
(
"id"
,
&
ProfileEntry
::
id
)
.
def_readwrite
(
"parent"
,
&
ProfileEntry
::
parent
)
.
def_readwrite
(
"memory"
,
&
ProfileEntry
::
memory
)
.
def_readwrite
(
"computation"
,
&
ProfileEntry
::
computation
)
.
def_property_readonly
(
"param"
,
[](
ProfileEntry
&
self
)
->
std
::
string
{
if
(
self
.
param
){
return
self
.
param
->
to_string
();
}
else
{
return
{};
}
});
py
::
class_
<
mgb
::
imperative
::
Profiler
>
(
m
,
"ProfilerImpl"
)
.
def
(
py
::
init
<>
())
.
def
(
"start"
,
&
mgb
::
imperative
::
Profiler
::
start
)
.
def
(
"stop"
,
&
mgb
::
imperative
::
Profiler
::
stop
)
.
def
(
"clear"
,
&
mgb
::
imperative
::
Profiler
::
clear
)
.
def
(
"dump"
,
&
mgb
::
imperative
::
Profiler
::
get_profile
);
using
mgb
::
imperative
::
TensorSanityCheck
;
using
mgb
::
imperative
::
TensorSanityCheck
;
py
::
class_
<
TensorSanityCheck
>
(
m
,
"TensorSanityCheckImpl"
)
py
::
class_
<
TensorSanityCheck
>
(
m
,
"TensorSanityCheckImpl"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
...
...
imperative/python/test/integration/test_profiler.py
0 → 100644
浏览文件 @
dbb3dd68
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
import
json
import
os
import
pytest
from
megengine
import
Parameter
,
tensor
from
megengine.core
import
option
from
megengine.module
import
Module
from
megengine.utils.profiler
import
Profiler
,
scope
class
Simple
(
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
a
=
Parameter
([
1.23
],
dtype
=
"float32"
)
def
forward
(
self
,
x
):
x
=
x
*
self
.
a
return
x
def
test_profiler
():
profile_prefix
=
"pytest_profile"
profile_format
=
"chrome_timeline.json"
profile_path
=
"{}.{}"
.
format
(
profile_prefix
,
profile_format
)
with
Profiler
(
profile_prefix
,
format
=
profile_format
):
with
scope
(
"my_scope"
):
oup
=
Simple
()(
tensor
([
1.23
],
dtype
=
"float32"
))
with
open
(
profile_path
,
"r"
)
as
f
:
events
=
json
.
load
(
f
)
os
.
remove
(
profile_path
)
prev_ts
=
{}
scope_count
=
0
for
event
in
events
:
if
"dur"
in
event
:
assert
event
[
"dur"
]
>=
0
elif
"ts"
in
event
and
"tid"
in
event
:
ts
=
event
[
"ts"
]
tid
=
event
[
"tid"
]
if
ts
==
0
:
continue
assert
(
tid
not
in
prev_ts
)
or
prev_ts
[
tid
]
<=
ts
prev_ts
[
tid
]
=
ts
if
"name"
in
event
and
event
[
"name"
]
==
"my_scope"
:
scope_count
+=
1
assert
scope_count
>
0
and
scope_count
%
2
==
0
imperative/src/impl/function_hook.h
浏览文件 @
dbb3dd68
...
@@ -17,52 +17,37 @@ namespace mgb {
...
@@ -17,52 +17,37 @@ namespace mgb {
namespace
imperative
{
namespace
imperative
{
template
<
typename
TFunction
>
template
<
typename
TFunction
>
class
FunctionHook
er
;
class
FunctionHook
;
template
<
typename
TRet
,
typename
...
TArgs
>
template
<
t
emplate
<
typename
>
class
TFunction
,
t
ypename
TRet
,
typename
...
TArgs
>
class
FunctionHook
er
<
TRet
(
TArgs
...)
>
{
class
FunctionHook
<
TFunction
<
TRet
(
TArgs
...)
>
>
{
public:
public:
using
FunctionType
=
thin_function
<
TRet
(
TArgs
...)
>
;
using
FunctionType
=
TFunction
<
TRet
(
TArgs
...)
>
;
//Type of hooks. Hook should accept a real function as argument
explicit
FunctionHook
(
FunctionType
*
fptr
)
:
m_fptr
{
fptr
}
{
//and invoke it on an appropriate time
m_backup
=
*
fptr
;
using
HookType
=
thin_function
<
TRet
(
FunctionType
,
TArgs
...)
>
;
explicit
FunctionHooker
(
FunctionType
*
fptr
)
:
m_fptr
{
fptr
}
{
m_backup
=
{
nullptr
,
[](
FunctionType
*
){}};
}
}
public:
public:
FunctionHooker
&
apply_hook
(
HookType
&&
hook
)
{
template
<
typename
THook
,
typename
=
std
::
enable_if_t
<
std
::
is_invocable_r_v
<
TRet
,
THook
,
FunctionType
,
TArgs
...>,
void
>>
if
(
!
m_backup
)
{
FunctionHook
&
apply_hook
(
THook
&&
hook
)
{
FunctionType
*
backup
=
new
FunctionType
(
*
m_fptr
);
//Restore hooked function, would be invoked when destructed
std
::
function
<
void
(
FunctionType
*
)
>
restorer
=
[
fptr
=
m_fptr
](
FunctionType
*
bkp
)
->
void
{
*
fptr
=
*
bkp
;
delete
bkp
;
};
m_backup
=
decltype
(
m_backup
)(
backup
,
restorer
);
}
//Replace with hooked version
//Replace with hooked version
*
m_fptr
=
[
func
=
*
m_fptr
,
hook
](
TArgs
...
args
)
->
TRet
{
*
m_fptr
=
[
func
=
*
m_fptr
,
hook
=
std
::
forward
<
THook
>
(
hook
)
](
TArgs
...
args
)
->
TRet
{
return
hook
(
func
,
std
::
forward
<
TArgs
>
(
args
)...);
return
hook
(
func
,
std
::
forward
<
TArgs
>
(
args
)...);
};
};
//Convinent for chain call
//Convinent for chain call
return
*
this
;
return
*
this
;
}
}
private:
private:
FunctionType
*
m_fptr
;
FunctionType
*
m_fptr
;
std
::
unique_ptr
<
FunctionType
,
std
::
function
<
void
(
FunctionType
*
)
>>
m_backup
;
FunctionType
m_backup
;
public:
~
FunctionHook
()
{
*
m_fptr
=
std
::
move
(
m_backup
);
}
};
};
//Helps to deduce template args
template
<
typename
TFunction
>
template
<
typename
TRet
,
typename
...
TArgs
>
auto
make_shared_hook
(
TFunction
*
fptr
){
FunctionHooker
(
thin_function
<
TRet
(
TArgs
...)
>*
f
)
return
std
::
make_shared
<
FunctionHook
<
TFunction
>>
(
fptr
);
->
FunctionHooker
<
TRet
(
TArgs
...)
>
;
template
<
typename
TSignature
>
auto
make_shared_hook
(
thin_function
<
TSignature
>*
fptr
){
return
std
::
make_shared
<
FunctionHooker
<
TSignature
>>
(
fptr
);
}
}
}
// namespace imperative
}
// namespace imperative
...
...
imperative/src/impl/interpreter/commands.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/commands.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <string>
#include <variant>
#include "megbrain/tensor.h"
#include "megbrain/imperative/op_def.h"
#include "megbrain/imperative/utils/to_string.h"
namespace
mgb
::
imperative
{
namespace
interpreter
::
intl
{
struct
TensorInfo
;
class
InterpreterProfiler
;
struct
Put
{
TensorInfo
*
dest
;
HostTensorND
value
;
bool
no_cache
=
false
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
functor
(
"no_cache"
,
no_cache
);
//functor("value", value);
}
const
char
*
get_name
()
const
{
return
"Put"
;
}
};
struct
ApplyOp
{
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
outputs
;
SmallVector
<
TensorInfo
*>
dels
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"op"
,
op
);
functor
(
"inputs"
,
inputs
);
functor
(
"outputs"
,
outputs
);
functor
(
"dels"
,
dels
);
}
const
char
*
get_name
()
const
{
return
"ApplyOp"
;
}
};
struct
Del
{
TensorInfo
*
dest
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
}
const
char
*
get_name
()
const
{
return
"Del"
;
}
};
struct
GetValue
{
TensorInfo
*
dest
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
}
const
char
*
get_name
()
const
{
return
"GetValue"
;
}
};
struct
SwapIn
{
TensorInfo
*
dest
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
}
const
char
*
get_name
()
const
{
return
"SwapIn"
;
}
};
struct
SwapOut
{
TensorInfo
*
dest
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
}
const
char
*
get_name
()
const
{
return
"SwapOut"
;
}
};
struct
Drop
{
TensorInfo
*
dest
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"dest"
,
dest
);
}
const
char
*
get_name
()
const
{
return
"Drop"
;
}
};
struct
SetOption
{
std
::
string
key
;
int
value
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"key"
,
key
);
functor
(
"value"
,
value
);
}
const
char
*
get_name
()
const
{
return
"SetOption"
;
}
};
struct
StartProfile
{
InterpreterProfiler
*
profiler
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{}
const
char
*
get_name
()
const
{
return
"StartProfile"
;
}
};
struct
StopProfile
{
std
::
string
basename
;
std
::
string
format
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"basename"
,
basename
);
functor
(
"format"
,
format
);
}
const
char
*
get_name
()
const
{
return
"StopProfile"
;
}
};
struct
PushScope
{
std
::
string
scope_name
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"scope_name"
,
scope_name
);
}
const
char
*
get_name
()
const
{
return
"PushScope"
;
}
};
struct
PopScope
{
std
::
string
scope_name
;
template
<
typename
TFunctor
>
void
get_props
(
TFunctor
&&
functor
)
const
{
functor
(
"scope_name"
,
scope_name
);
}
const
char
*
get_name
()
const
{
return
"PopScope"
;
}
};
using
Command
=
std
::
variant
<
Put
,
ApplyOp
,
Del
,
GetValue
,
SwapIn
,
SwapOut
,
Drop
,
SetOption
,
StartProfile
,
StopProfile
,
PushScope
,
PopScope
>
;
using
IdentifiedCommand
=
std
::
pair
<
uint64_t
,
Command
>
;
}
template
<
>
struct
ToStringTrait
<
interpreter
::
intl
::
Command
>
{
std
::
string
operator
()(
const
interpreter
::
intl
::
Command
&
cmd
)
const
{
return
std
::
visit
([](
auto
&
cmd
){
std
::
string
result
=
cmd
.
get_name
();
result
+=
"{"
;
cmd
.
get_props
([
&
](
const
char
*
key
,
auto
&&
value
)
{
result
+=
key
;
result
+=
": "
;
result
+=
to_string
(
value
);
result
+=
","
;
});
result
+=
"}"
;
return
result
;
},
cmd
);
}
};
}
imperative/src/impl/interpreter/events.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/events.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "./commands.h"
#include "./tensor_info.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
struct
CommandEvent
{
IdentifiedCommand
icmd
;
};
struct
CommandEnqueueEvent
:
CommandEvent
{};
struct
CommandExecuteEvent
:
CommandEvent
{};
struct
CommandFinishEvent
:
CommandEvent
{};
struct
OpEvent
{
uint64_t
id
;
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
uint64_t
>
inputs
;
SmallVector
<
uint64_t
>
outputs
;
};
struct
HostOpExecuteEvent
:
OpEvent
{};
struct
DeviceOpExecuteEvent
:
OpEvent
{};
struct
HostOpFinishEvent
:
OpEvent
{};
struct
DeviceOpFinishEvent
:
OpEvent
{};
struct
TensorDeclareEvent
{
uint64_t
tensor_id
;
};
struct
TensorProduceEvent
{
uint64_t
tensor_id
;
TensorLayout
layout
;
CompNode
device
;
};
struct
TensorEraseEvent
{
uint64_t
tensor_id
;
};
struct
TensorPropEvent
{
uint64_t
tensor_id
;
TensorInfo
::
Prop
prop
;
std
::
string
prop_desc
;
};
struct
TensorGetPropEvent
:
TensorPropEvent
{};
struct
TensorWaitPropEvent
:
TensorPropEvent
{};
struct
TensorNotifyPropEvent
:
TensorPropEvent
{};
struct
TensorWaitPropFinishEvent
:
TensorPropEvent
{};
struct
SyncStartEvent
{};
struct
SyncFinishEvent
{};
struct
ScopeEvent
{
std
::
string
name
;
};
struct
ChannelBeginScope
:
ScopeEvent
{};
struct
ChannelEndScope
:
ScopeEvent
{};
struct
WorkerBeginScope
:
ScopeEvent
{};
struct
WorkerEndScope
:
ScopeEvent
{};
struct
DeviceBeginScope
:
ScopeEvent
{};
struct
DeviceEndScope
:
ScopeEvent
{};
}
imperative/src/impl/interpreter_impl.cpp
→
imperative/src/impl/interpreter
/interpreter
_impl.cpp
浏览文件 @
dbb3dd68
此差异已折叠。
点击以展开。
imperative/src/impl/interpreter_impl.h
→
imperative/src/impl/interpreter
/interpreter
_impl.h
浏览文件 @
dbb3dd68
/**
/**
* \file imperative/src/impl/interpreter_impl.h
* \file imperative/src/impl/interpreter
/interpreter
_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
...
@@ -9,14 +9,24 @@
...
@@ -9,14 +9,24 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
*/
#pragma once
#include <deque>
#include <deque>
#include <future>
#include <future>
#include <list>
#include <list>
#include <thread>
#include <unordered_set>
#include <unordered_set>
#include <variant>
#include <variant>
#include "megbrain/utils/mempool.h"
#include "megbrain/utils/mempool.h"
#include "megbrain/imperative/interpreter.h"
#include "megbrain/imperative/interpreter.h"
#include "megbrain/imperative/profiler.h"
#include "./commands.h"
#include "./events.h"
#include "./tensor_info.h"
#include "./option_manager.h"
#include "./profiler.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
namespace
mgb
::
imperative
::
interpreter
::
intl
{
...
@@ -26,188 +36,9 @@ struct InterpreterImpl : Interpreter {
...
@@ -26,188 +36,9 @@ struct InterpreterImpl : Interpreter {
std
::
unique_ptr
<
Channel
>
create_channel
()
override
;
std
::
unique_ptr
<
Channel
>
create_channel
()
override
;
};
};
enum
EvictType
{
NONE
=
0
,
SWAP
=
1
,
DROP
=
2
,
};
struct
TensorInfo
;
using
TensorInfoPtr
=
std
::
shared_ptr
<
TensorInfo
>
;
struct
TensorInfo
{
TensorPtr
ptr
;
LogicalTensorDesc
desc
;
// FIXME: broken by drop
bool
value_fetched
=
false
;
bool
invalid
=
false
;
EvictType
evict_type
=
NONE
;
HostTensorND
h_value
;
// reserved for auto drop
size_t
pinned
=
0
;
size_t
recompute_times
=
0
;
struct
ComputePath
{
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
unique_inputs
;
SmallVector
<
TensorInfo
*>
outputs
;
size_t
ref_cnt
()
{
return
outputs
.
size
()
-
std
::
count
(
outputs
.
begin
(),
outputs
.
end
(),
nullptr
);
}
static
ComputePath
*
make
(
std
::
shared_ptr
<
OpDef
>
op
,
SmallVector
<
TensorInfo
*>
inputs
,
SmallVector
<
TensorInfo
*>
outputs
)
{
auto
*
path
=
new
TensorInfo
::
ComputePath
();
path
->
op
=
op
;
path
->
inputs
=
inputs
;
path
->
outputs
=
outputs
;
// dedup
SmallVector
<
TensorInfo
*>
unique_inputs
=
inputs
;
std
::
sort
(
unique_inputs
.
begin
(),
unique_inputs
.
end
());
unique_inputs
.
erase
(
std
::
unique
(
unique_inputs
.
begin
(),
unique_inputs
.
end
()),
unique_inputs
.
end
());
path
->
unique_inputs
=
unique_inputs
;
// attach users
for
(
auto
input
:
unique_inputs
)
{
input
->
users
.
push_back
(
path
);
}
// attach producer
for
(
auto
output
:
outputs
)
{
output
->
producer
=
path
;
}
return
path
;
}
}
*
producer
=
nullptr
;
void
pin
()
{
++
pinned
;
}
void
unpin
()
{
--
pinned
;
}
void
detach_producer
()
{
if
(
!
producer
)
{
return
;
}
auto
output
=
std
::
find
(
producer
->
outputs
.
begin
(),
producer
->
outputs
.
end
(),
this
);
mgb_assert
(
output
!=
producer
->
outputs
.
end
());
*
output
=
nullptr
;
if
(
producer
->
ref_cnt
()
==
0
)
{
for
(
auto
*
input
:
producer
->
unique_inputs
)
{
input
->
users
.
erase
(
std
::
find
(
input
->
users
.
begin
(),
input
->
users
.
end
(),
producer
));
}
delete
producer
;
}
producer
=
nullptr
;
}
SmallVector
<
ComputePath
*>
users
;
};
struct
Put
{
TensorInfo
*
dest
;
HostTensorND
value
;
bool
no_cache
=
false
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: Put %p"
,
dest
);
}
};
struct
ApplyOp
{
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
outputs
;
SmallVector
<
TensorInfo
*>
dels
;
std
::
string
to_string
()
const
{
std
::
string
builder
{
"Command: ApplyOp {"
};
builder
+=
"inputs ["
;
for
(
auto
*
input
:
inputs
)
{
builder
+=
ssprintf
(
"%p, "
,
input
);
}
builder
+=
"], outputs ["
;
for
(
auto
*
output
:
outputs
)
{
builder
+=
ssprintf
(
"%p, "
,
output
);
}
builder
+=
"], dels ["
;
for
(
auto
*
del
:
dels
)
{
builder
+=
ssprintf
(
"%p, "
,
del
);
}
builder
+=
"]"
;
return
builder
;
}
};
struct
Del
{
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: Del %p"
,
dest
);
}
};
struct
GetValue
{
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: GetValue %p"
,
dest
);
}
};
struct
SwapIn
{
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: SwapIn %p"
,
dest
);
}
};
struct
SwapOut
{
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: SwapOut %p"
,
dest
);
}
};
struct
Drop
{
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: Drop %p"
,
dest
);
}
};
struct
Move
{
TensorInfo
*
src
;
TensorInfo
*
dest
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: Move %s to %s"
,
src
->
desc
.
layout
.
to_string
().
c_str
(),
dest
->
desc
.
layout
.
to_string
().
c_str
());
}
};
struct
Flush
{
TensorInfo
*
dest
=
nullptr
;
std
::
string
to_string
()
const
{
return
ssprintf
(
"Command: Flush %p"
,
dest
);
}
};
struct
Nop
{
std
::
string
to_string
()
const
{
return
"Command: Nop"
;
}
};
using
Command
=
std
::
variant
<
Put
,
ApplyOp
,
Del
,
GetValue
,
SwapIn
,
SwapOut
,
Drop
,
Move
,
Flush
,
Nop
>
;
struct
ChannelImpl
:
Interpreter
::
Channel
{
struct
ChannelImpl
:
Interpreter
::
Channel
{
ChannelImpl
()
:
m_worker
(
this
),
m_buffer
(
this
)
{}
ChannelImpl
()
;
~
ChannelImpl
()
override
;
~
ChannelImpl
()
override
;
Handle
put
(
const
HostTensorND
&
value
,
bool
no_cache
)
override
;
Handle
put
(
const
HostTensorND
&
value
,
bool
no_cache
)
override
;
...
@@ -231,19 +62,21 @@ struct ChannelImpl : Interpreter::Channel {
...
@@ -231,19 +62,21 @@ struct ChannelImpl : Interpreter::Channel {
void
sync
()
override
;
void
sync
()
override
;
void
close
()
override
;
void
close
()
override
;
void
set_swap_flag
(
bool
)
override
;
void
set_drop_flag
(
bool
)
override
;
void
set_buffer_length
(
int
)
override
;
void
config_async_level
(
int
level
)
override
;
int
get_option
(
std
::
string
name
)
override
;
int
get_async_level
()
override
;
void
set_option
(
std
::
string
name
,
int
value
)
override
;
void
start_profile
(
std
::
unordered_map
<
std
::
string
,
int
>
option
)
override
;
void
stop_profile
(
std
::
string
basename
,
std
::
string
format
)
override
;
void
push_scope
(
std
::
string
)
override
;
void
pop_scope
(
std
::
string
)
override
;
private:
private:
TensorInfo
*
alloc
();
TensorInfo
*
alloc
();
void
free
(
TensorInfo
*
);
void
free
(
TensorInfo
*
);
void
detach_users
(
TensorInfo
*
);
void
detach_users
(
TensorInfo
*
);
void
process_one_task
(
Command
&
);
void
process_one_task
(
Identified
Command
&
);
void
check_worker_exc_unsafe
();
void
check_worker_exc_unsafe
();
...
@@ -265,27 +98,38 @@ private:
...
@@ -265,27 +98,38 @@ private:
const
SmallVector
<
LogicalTensorDesc
>&
input_descs
,
const
SmallVector
<
LogicalTensorDesc
>&
input_descs
,
SmallVector
<
Handle
>*
outputs
);
SmallVector
<
Handle
>*
outputs
);
void
assert_in_channel
();
void
assert_in_worker
();
void
sync_device_scope
(
CompNode
device
);
template
<
typename
TCommand
>
void
enqueue_command
(
TCommand
&&
cmd
)
{
m_buffer
.
enqueue
(
Command
{
std
::
forward
<
TCommand
>
(
cmd
)});
}
std
::
mutex
m_mutex
;
std
::
mutex
m_mutex
;
std
::
condition_variable
m_cv
;
std
::
condition_variable
m_cv
;
MemPool
<
TensorInfo
>
m_pool
;
MemPool
<
TensorInfo
>
m_pool
;
std
::
unordered_set
<
Handle
>
m_valid_handle
;
std
::
unordered_set
<
Handle
>
m_valid_handle
;
TensorInfo
*
m_waitee
=
nullptr
;
TensorInfo
*
m_waitee
=
nullptr
;
std
::
exception_ptr
m_worker_exc
;
std
::
exception_ptr
m_worker_exc
;
s
ize_t
m_enable_evict
=
0
;
s
td
::
atomic_uint64_t
m_last_id
=
0
;
struct
WorkQueue
:
AsyncQueueSC
<
Command
,
WorkQueue
>
{
struct
WorkQueue
:
AsyncQueueSC
<
Identified
Command
,
WorkQueue
>
{
// set max_spin=0 to prevent Queue fetch task in busy wait manner.
// set max_spin=0 to prevent Queue fetch task in busy wait manner.
// this won't affect throughput when python interpreter is sending enough task,
// this won't affect throughput when python interpreter is sending enough task,
// but will significantly save CPU time when waiting for task, e.g. wait for data input
// but will significantly save CPU time when waiting for task, e.g. wait for data input
WorkQueue
(
ChannelImpl
*
owner
)
WorkQueue
(
ChannelImpl
*
owner
)
:
AsyncQueueSC
<
Command
,
WorkQueue
>
(
0
),
m_owner
(
owner
)
{
:
AsyncQueueSC
<
Identified
Command
,
WorkQueue
>
(
0
),
m_owner
(
owner
)
{
sys
::
set_thread_name
(
"interpreter"
);
sys
::
set_thread_name
(
"interpreter"
);
}
}
void
process_one_task
(
Command
&
cmd
)
{
void
process_one_task
(
IdentifiedCommand
&
i
cmd
)
{
m_owner
->
process_one_task
(
cmd
);
m_owner
->
process_one_task
(
i
cmd
);
}
}
void
on_async_queue_worker_thread_start
()
override
{
void
on_async_queue_worker_thread_start
()
override
{
sys
::
set_thread_name
(
"worker"
);
sys
::
set_thread_name
(
"worker"
);
m_owner
->
m_worker_state
.
tid
=
std
::
this_thread
::
get_id
();
}
}
private:
private:
ChannelImpl
*
m_owner
;
ChannelImpl
*
m_owner
;
...
@@ -304,24 +148,14 @@ private:
...
@@ -304,24 +148,14 @@ private:
* Then the fused Apply may be invoked inplace. see: ChannelImpl::process_one_task
* Then the fused Apply may be invoked inplace. see: ChannelImpl::process_one_task
*/
*/
struct
CommandBuffer
{
struct
CommandBuffer
{
CommandBuffer
(
ChannelImpl
*
owner
)
:
m_owner
(
owner
)
{
CommandBuffer
(
ChannelImpl
*
owner
)
:
m_owner
(
owner
)
{}
int
capacity
=
3
;
if
(
const
char
*
capacity_str
=
MGB_GETENV
(
"MEGENGINE_COMMAND_BUFFER_LENGTH"
))
{
capacity
=
atoi
(
capacity_str
);
}
set_capacity
(
capacity
);
}
void
enqueue
(
Command
cmd
);
void
enqueue
(
Command
cmd
);
bool
empty
()
const
{
bool
empty
()
const
{
return
m_commands
.
empty
();
return
m_commands
.
empty
();
}
}
void
set_capacity
(
int
capacity
)
{
void
flush
();
mgb_assert
(
capacity
>=
0
&&
capacity
<
100
,
"invalid command buffer length"
);
m_capacity
=
capacity
;
}
private:
private:
ChannelImpl
*
m_owner
;
ChannelImpl
*
m_owner
;
size_t
m_capacity
;
std
::
deque
<
Command
>
m_commands
;
std
::
deque
<
Command
>
m_commands
;
using
Handle
=
decltype
(
m_commands
)
::
iterator
;
using
Handle
=
decltype
(
m_commands
)
::
iterator
;
...
@@ -346,6 +180,26 @@ private:
...
@@ -346,6 +180,26 @@ private:
//! level 0: both sync.
//! level 0: both sync.
int
m_async_level
=
2
;
int
m_async_level
=
2
;
int
m_max_recompute_time
=
1
;
int
m_max_recompute_time
=
1
;
struct
State
{
std
::
thread
::
id
tid
;
OptionManager
options
;
std
::
vector
<
std
::
string
>
scopes
;
std
::
unique_ptr
<
InterpreterProfiler
>
profiler
;
State
()
{
profiler
=
std
::
make_unique
<
InterpreterProfiler
>
();
}
};
struct
ChannelState
:
State
{};
struct
WorkerState
:
State
{
CompNode
::
UnorderedMap
<
std
::
vector
<
std
::
string
>>
device_scope_map
;
};
ChannelState
m_channel_state
;
WorkerState
m_worker_state
;
};
};
}
// namespace mgb::imperative::interpreter::intl
}
// namespace mgb::imperative::interpreter::intl
imperative/src/impl/interpreter/option_manager.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/option_manager.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <string>
#include <unordered_map>
#include "megbrain/common.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
struct
OptionManager
{
private:
std
::
unordered_map
<
std
::
string
,
int
*>
m_option_map
=
{};
public:
#define DEF_OPTION(name, env_key, default_value, desc) \
int name = (m_option_map[#name]=&name, get_option_from_env(env_key, default_value));
DEF_OPTION
(
async_level
,
"MEGENGINE_INTERP_ASYNC_LEVEL"
,
2
,
"config whether raise error exactly when invoking op.
\n
"
"level 2: both device and user side errors are async;
\n
"
"level 1: user side errors are sync;
\n
"
"level 0: both sync."
);
DEF_OPTION
(
enable_swap
,
"MEGENGINE_ENABLE_SWAP"
,
0
,
""
);
DEF_OPTION
(
enable_drop
,
"MEGENGINE_ENABLE_DROP"
,
0
,
""
);
DEF_OPTION
(
max_recompute_time
,
"MEGENGINE_MAX_RECOMP_TIME"
,
1
,
""
);
DEF_OPTION
(
catch_worker_execption
,
"MEGENGINE_CATCH_WORKER_EXEC"
,
1
,
"catch worker exception if enabled, close it when debugging"
);
DEF_OPTION
(
buffer_length
,
"MEGENGINE_COMMAND_BUFFER_LENGTH"
,
3
,
"set command buffer length."
);
DEF_OPTION
(
enable_host_compute
,
"MEGENGINE_HOST_COMPUTE"
,
1
,
"enable host compute, thus computation may be done in host event if it's device is gpu."
);
#undef DEF_OPTION
void
set_option
(
const
std
::
string
&
name
,
int
value
)
{
*
m_option_map
[
name
]
=
value
;
}
int
get_option
(
const
std
::
string
&
name
)
const
{
return
*
m_option_map
.
at
(
name
);
}
static
int
get_option_from_env
(
const
std
::
string
&
name
,
int
default_value
)
{
if
(
const
char
*
env_val
=
MGB_GETENV
(
name
.
c_str
()))
{
default_value
=
std
::
atoi
(
env_val
);
}
return
default_value
;
}
};
}
imperative/src/impl/interpreter/profiler.cpp
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/profiler.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./profiler.h"
#include <sstream>
#include <cinttypes>
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h>
#elif defined(_WIN32)
#include <process.h>
#else
#error Unsupported platform
#endif
#include "../op_trait.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
namespace
{
struct
InterpreterProfilerDumpChromeTimelineContext
{
// either host_thread(std::thread::id) or device_thread(CompNode)
using
Thread
=
std
::
variant
<
std
::
thread
::
id
,
CompNode
>
;
// input params
std
::
string
base_name
;
std
::
string
format
;
InterpreterProfiler
::
Data
profile_data
;
InterpreterProfiler
::
Option
option
;
std
::
function
<
std
::
string
(
std
::
thread
::
id
)
>
host_map
;
// internal states
decltype
(
getpid
())
pid
;
CompNode
::
UnorderedMap
<
std
::
map
<
double
,
CompNode
::
Event
*>>
device_sync_map
;
SmallVector
<
Thread
>
thread_list
;
double
time_start
;
// options
bool
show_operator_name
;
// results
ChromeTraceEventList
event_list
;
InterpreterProfilerDumpChromeTimelineContext
(
std
::
string
base_name
,
std
::
string
format
,
InterpreterProfiler
::
Data
profile_data
,
InterpreterProfiler
::
Option
option
,
std
::
function
<
std
::
string
(
std
::
thread
::
id
)
>
host_map
)
:
base_name
{
base_name
},
format
{
format
},
profile_data
{
profile_data
},
option
{
option
},
host_map
{
host_map
}
{
pid
=
getpid
();
time_start
=
option
.
align_time
?
time_start
:
0
;
show_operator_name
=
option
.
show_operator_name
;
}
// get device time from event
double
get_device_time
(
CompNode
::
Event
*
device_event
,
double
host_time
)
{
device_event
->
host_wait
();
auto
&
sync_map
=
device_sync_map
[
device_event
->
comp_node
()];
// find sync point
auto
iter
=
sync_map
.
begin
();
auto
sync_current
=
[
&
]
{
iter
=
sync_map
.
insert
(
iter
,
{
host_time
,
device_event
});
return
host_time
;
};
if
(
iter
==
sync_map
.
end
())
{
// not found, insert sync
return
sync_current
();
}
auto
&
[
base_time
,
base
]
=
*
iter
;
// calculate elapsed time
double
delta_time
=
base
->
elapsed_time_until
(
*
device_event
)
*
1e3
;
return
base_time
+
delta_time
;
};
template
<
typename
T
>
size_t
get_tid
(
T
t
)
{
for
(
size_t
i
=
0
;
i
<
thread_list
.
size
();
i
++
)
{
if
(
thread_list
[
i
]
==
Thread
{
t
})
{
return
i
;
}
}
thread_list
.
push_back
(
t
);
return
thread_list
.
size
()
-
1
;
};
ChromeTraceEvent
&
new_event
(
std
::
string
name
,
char
ph
,
uint64_t
tid
,
double
ts
)
{
return
event_list
.
new_event
().
name
(
name
).
ph
(
ph
).
tid
(
tid
).
ts
(
ts
).
pid
(
pid
);
};
// convert Command to json object. Has to be an callable object
static
auto
constexpr
cmd_to_args
=
[](
auto
&&
cmd
)
{
auto
args
=
json
::
Object
::
make
();
cmd
.
get_props
([
&
](
const
char
*
key
,
auto
&&
value
){
(
*
args
)[
key
]
=
json
::
String
::
make
(
to_string
(
value
));
});
(
*
args
)[
"__type__"
]
=
json
::
String
::
make
(
typeid
(
cmd
).
name
());
return
args
;
};
void
process
()
{
// enumerate and process each record
for
(
auto
&&
record
:
profile_data
.
records
)
{
std
::
visit
([
this
](
auto
&
record
){
using
TEvent
=
std
::
decay_t
<
decltype
(
record
.
data
)
>
;
Session
<
TEvent
>
(
*
this
,
record
).
process
();
},
record
);
}
for
(
size_t
tid
=
0
;
tid
<
thread_list
.
size
();
++
tid
)
{
auto
tname
=
std
::
visit
([
&
](
auto
&
host_or_device
)
->
std
::
string
{
using
T
=
std
::
decay_t
<
decltype
(
host_or_device
)
>
;
if
constexpr
(
std
::
is_same_v
<
T
,
std
::
thread
::
id
>
)
{
// take name from host_map
return
host_map
(
host_or_device
);
}
else
{
// use CompNode::to_string
return
host_or_device
.
to_string
();
}
},
thread_list
[
tid
]);
// assign thread name
new_event
(
"thread_name"
,
'M'
,
tid
,
0
)
.
arg
(
"name"
,
tname
);
}
// wraite output to file
std
::
string
out_buf
;
event_list
.
to_json
()
->
writeto
(
out_buf
,
4
);
std
::
ofstream
output_stream
;
output_stream
.
open
(
base_name
+
"."
+
format
);
output_stream
<<
out_buf
;
output_stream
.
flush
();
output_stream
.
close
();
}
template
<
typename
TEvent
>
struct
Session
{
InterpreterProfilerDumpChromeTimelineContext
&
ctx
;
ProfilerBase
::
EventRecord
<
TEvent
>&
record
;
TEvent
&
data
;
Session
(
InterpreterProfilerDumpChromeTimelineContext
&
ctx
,
ProfilerBase
::
EventRecord
<
TEvent
>&
record
)
:
ctx
{
ctx
},
record
{
record
},
data
{
record
.
data
}
{}
uint64_t
get_host_tid
()
{
return
ctx
.
get_tid
(
record
.
host
().
tid
);
};
double
get_host_ts
()
{
return
(
ctx
.
time_start
+
record
.
host
().
time
)
*
1e3
;
};
uint64_t
get_device_tid
()
{
return
ctx
.
get_tid
(
record
.
device
().
event
->
comp_node
());
};
double
get_device_ts
()
{
return
(
ctx
.
time_start
+
ctx
.
get_device_time
(
record
.
device
().
event
.
get
(),
record
.
device
().
after
))
*
1e3
;
};
ChromeTraceEvent
&
new_host_event
(
std
::
string
name
,
char
ph
)
{
return
ctx
.
new_event
(
std
::
move
(
name
),
ph
,
get_host_tid
(),
get_host_ts
());
};
ChromeTraceEvent
&
new_device_event
(
std
::
string
name
,
char
ph
)
{
return
ctx
.
new_event
(
std
::
move
(
name
),
ph
,
get_device_tid
(),
get_device_ts
());
};
void
process
()
{
// dispatch event by type
if
constexpr
(
std
::
is_same_v
<
TEvent
,
CommandEnqueueEvent
>
)
{
auto
args
=
std
::
visit
(
cmd_to_args
,
data
.
icmd
.
second
);
new_host_event
(
"CommandEnqueue"
,
'X'
).
dur
(
0
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
CommandExecuteEvent
>
)
{
auto
args
=
std
::
visit
(
cmd_to_args
,
data
.
icmd
.
second
);
new_host_event
(
"CommandExecute"
,
'B'
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
CommandFinishEvent
>
)
{
new_host_event
(
"CommandExecute"
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
HostOpExecuteEvent
>
)
{
auto
args
=
json
::
Object
::
make
();
auto
props
=
OpDef
::
props
(
*
data
.
op
);
auto
name
=
data
.
op
->
trait
()
->
name
;
for
(
auto
&&
[
prop_name
,
prop_val
]
:
props
)
{
(
*
args
)[
std
::
string
(
"op."
)
+
prop_name
]
=
json
::
String
::
make
(
prop_val
);
}
(
*
args
)[
"name"
]
=
json
::
String
::
make
(
name
);
(
*
args
)[
"id"
]
=
json
::
Number
::
make
(
data
.
id
);
(
*
args
)[
"inputs"
]
=
json
::
String
::
make
(
to_string
(
data
.
inputs
));
(
*
args
)[
"outputs"
]
=
json
::
String
::
make
(
to_string
(
data
.
outputs
));
new_host_event
(
ctx
.
show_operator_name
?
name
:
"OpExecute"
,
'B'
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
DeviceOpExecuteEvent
>
)
{
auto
args
=
json
::
Object
::
make
();
auto
props
=
OpDef
::
props
(
*
data
.
op
);
auto
name
=
data
.
op
->
trait
()
->
name
;
for
(
auto
&&
[
prop_name
,
prop_val
]
:
props
)
{
(
*
args
)[
std
::
string
(
"op."
)
+
prop_name
]
=
json
::
String
::
make
(
prop_val
);
}
(
*
args
)[
"name"
]
=
json
::
String
::
make
(
name
);
(
*
args
)[
"id"
]
=
json
::
Number
::
make
(
data
.
id
);
(
*
args
)[
"inputs"
]
=
json
::
String
::
make
(
to_string
(
data
.
inputs
));
(
*
args
)[
"outputs"
]
=
json
::
String
::
make
(
to_string
(
data
.
outputs
));
new_device_event
(
ctx
.
show_operator_name
?
name
:
"OpExecute"
,
'B'
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
HostOpFinishEvent
>
)
{
auto
name
=
data
.
op
->
trait
()
->
name
;
new_host_event
(
ctx
.
show_operator_name
?
name
:
"OpExecute"
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
DeviceOpFinishEvent
>
)
{
auto
name
=
data
.
op
->
trait
()
->
name
;
new_device_event
(
ctx
.
show_operator_name
?
name
:
"OpExecute"
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorDeclareEvent
>
)
{
json
::
Number
::
make
(
data
.
tensor_id
);
new_host_event
(
"TensorLifetime"
,
'N'
).
id
(
data
.
tensor_id
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorProduceEvent
>
)
{
auto
snapshot
=
json
::
Object
::
make
();
(
*
snapshot
)[
"shape"
]
=
json
::
String
::
make
(
to_string
((
TensorShape
)
data
.
layout
));
(
*
snapshot
)[
"dtype"
]
=
json
::
String
::
make
(
to_string
(
data
.
layout
.
dtype
));
(
*
snapshot
)[
"device"
]
=
json
::
String
::
make
(
to_string
(
data
.
device
));
json
::
Number
::
make
(
data
.
tensor_id
);
new_host_event
(
"TensorLifetime"
,
'O'
).
id
(
data
.
tensor_id
).
arg
(
"snapshot"
,
snapshot
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorEraseEvent
>
)
{
json
::
Number
::
make
(
data
.
tensor_id
);
new_host_event
(
"TensorLifetime"
,
'D'
).
id
(
data
.
tensor_id
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorGetPropEvent
>
)
{
auto
args
=
json
::
Object
::
make
();
(
*
args
)[
"id"
]
=
json
::
Number
::
make
(
data
.
tensor_id
);
(
*
args
)[
"prop"
]
=
json
::
String
::
make
(
to_string
(
data
.
prop
));
(
*
args
)[
"prop_desc"
]
=
json
::
String
::
make
(
data
.
prop_desc
);
new_host_event
(
"TensorGetProp"
,
'X'
).
dur
(
0
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorNotifyPropEvent
>
)
{
// TODO
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorWaitPropEvent
>
)
{
auto
args
=
json
::
Object
::
make
();
(
*
args
)[
"id"
]
=
json
::
Number
::
make
(
data
.
tensor_id
);
(
*
args
)[
"prop"
]
=
json
::
String
::
make
(
to_string
(
data
.
prop
));
(
*
args
)[
"prop_desc"
]
=
json
::
String
::
make
(
data
.
prop_desc
);
new_host_event
(
"TensorWaitProp"
,
'B'
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
TensorWaitPropFinishEvent
>
)
{
auto
args
=
json
::
Object
::
make
();
(
*
args
)[
"id"
]
=
json
::
Number
::
make
(
data
.
tensor_id
);
(
*
args
)[
"prop"
]
=
json
::
String
::
make
(
to_string
(
data
.
prop
));
(
*
args
)[
"prop_desc"
]
=
json
::
String
::
make
(
data
.
prop_desc
);
new_host_event
(
"TensorWaitProp"
,
'E'
).
args
(
args
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
SyncStartEvent
>
)
{
new_host_event
(
"SyncEvent"
,
'B'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
SyncFinishEvent
>
)
{
new_host_event
(
"SyncEvent"
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
ChannelBeginScope
>
)
{
new_host_event
(
data
.
name
,
'B'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
ChannelEndScope
>
)
{
new_host_event
(
data
.
name
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
WorkerBeginScope
>
)
{
new_host_event
(
data
.
name
,
'B'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
WorkerEndScope
>
)
{
new_host_event
(
data
.
name
,
'E'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
DeviceBeginScope
>
)
{
new_device_event
(
data
.
name
,
'B'
);
}
else
if
constexpr
(
std
::
is_same_v
<
TEvent
,
DeviceEndScope
>
)
{
new_device_event
(
data
.
name
,
'E'
);
}
else
{
static_assert
(
!
std
::
is_same_v
<
TEvent
,
TEvent
>
);
}
}
};
};
}
void
InterpreterProfiler
::
dump_data
(
std
::
string
basename
,
std
::
string
format
,
InterpreterProfiler
::
Data
profile_data
,
const
InterpreterProfiler
::
Option
&
option
,
std
::
function
<
std
::
string
(
std
::
thread
::
id
)
>
host_map
)
{
InterpreterProfilerDumpChromeTimelineContext
{
basename
,
format
,
profile_data
,
option
,
host_map
}.
process
();
}
}
imperative/src/impl/interpreter/profiler.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/profiler.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megbrain/imperative/profiler.h"
#include "./commands.h"
#include "./events.h"
#include "./option_manager.h"
namespace
mgb
::
imperative
::
interpreter
::
intl
{
class
InterpreterProfiler
:
public
Profiler
<
CommandEnqueueEvent
,
CommandExecuteEvent
,
CommandFinishEvent
,
HostOpExecuteEvent
,
HostOpFinishEvent
,
DeviceOpExecuteEvent
,
DeviceOpFinishEvent
,
TensorDeclareEvent
,
TensorProduceEvent
,
TensorEraseEvent
,
TensorGetPropEvent
,
TensorWaitPropEvent
,
TensorNotifyPropEvent
,
TensorWaitPropFinishEvent
,
SyncStartEvent
,
SyncFinishEvent
,
ChannelBeginScope
,
ChannelEndScope
,
WorkerBeginScope
,
WorkerEndScope
,
DeviceBeginScope
,
DeviceEndScope
>
{
/*22 events now. Enum code may be a better solution*/
public:
enum
Topic
{
Command
=
0b000001
,
Operator
=
0b000010
,
TensorLifetime
=
0b000100
,
TensorProp
=
0b001000
,
Sync
=
0b010000
,
Scope
=
0b100000
,
};
struct
Option
{
Topic
topic
;
bool
align_time
;
bool
show_operator_name
;
static
Option
from_dict
(
std
::
unordered_map
<
std
::
string
,
int
>
dict
)
{
Option
option
;
option
.
topic
=
Topic
(
dict
.
at
(
"topic"
));
option
.
align_time
=
bool
(
dict
.
at
(
"align_time"
));
option
.
show_operator_name
=
bool
(
dict
.
at
(
"show_operator_name"
));
return
option
;
}
};
Option
get_option
()
const
{
return
m_option
;
}
void
set_option
(
const
Option
&
option
)
{
m_option
=
option
;
}
static
void
dump_data
(
std
::
string
basename
,
std
::
string
format
,
InterpreterProfiler
::
Data
profile_data
,
const
Option
&
option
,
std
::
function
<
std
::
string
(
std
::
thread
::
id
)
>
host_map
);
static
Mask
topic_to_mask
(
Topic
topic
)
{
Mask
result
;
if
(
topic
&
Command
)
{
result
|=
mask_of
<
CommandEnqueueEvent
,
CommandExecuteEvent
,
CommandFinishEvent
>
();
}
if
(
topic
&
Operator
)
{
result
|=
mask_of
<
HostOpExecuteEvent
,
HostOpFinishEvent
>
();
result
|=
mask_of
<
DeviceOpExecuteEvent
,
DeviceOpFinishEvent
>
();
}
if
(
topic
&
TensorLifetime
)
{
result
|=
mask_of
<
TensorDeclareEvent
,
TensorProduceEvent
,
TensorEraseEvent
>
();
}
if
(
topic
&
TensorProp
)
{
result
|=
mask_of
<
TensorGetPropEvent
,
TensorWaitPropEvent
,
TensorNotifyPropEvent
,
TensorWaitPropFinishEvent
>
();
}
if
(
topic
&
Sync
)
{
result
|=
mask_of
<
SyncStartEvent
,
SyncFinishEvent
>
();
}
if
(
topic
&
Scope
)
{
result
|=
mask_of
<
ChannelBeginScope
,
ChannelEndScope
,
WorkerBeginScope
,
WorkerEndScope
>
();
result
|=
mask_of
<
DeviceBeginScope
,
DeviceEndScope
>
();
}
return
result
;
}
private:
Option
m_option
;
};
}
imperative/src/impl/interpreter/tensor_info.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/impl/interpreter/tensor_info.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megbrain/imperative/physical_tensor.h"
#include "megbrain/imperative/op_def.h"
#include "megbrain/imperative/utils/to_string.h"
namespace
mgb
::
imperative
{
namespace
interpreter
::
intl
{
enum
EvictType
{
NONE
=
0
,
SWAP
=
1
,
DROP
=
2
,
};
struct
TensorInfo
;
using
TensorInfoPtr
=
std
::
shared_ptr
<
TensorInfo
>
;
struct
TensorInfo
{
enum
Prop
{
Device
,
Shape
,
DType
,
DevValue
,
HostValue
};
uint64_t
id
;
TensorPtr
ptr
;
LogicalTensorDesc
desc
;
// FIXME: broken by drop
bool
value_fetched
=
false
;
bool
invalid
=
false
;
bool
allow_delete
=
false
;
EvictType
evict_type
=
NONE
;
HostTensorND
h_value
;
// reserved for auto drop
size_t
pinned
=
0
;
size_t
recompute_times
=
0
;
struct
ComputePath
{
std
::
shared_ptr
<
OpDef
>
op
;
SmallVector
<
TensorInfo
*>
inputs
;
SmallVector
<
TensorInfo
*>
unique_inputs
;
SmallVector
<
TensorInfo
*>
outputs
;
size_t
ref_cnt
()
{
return
outputs
.
size
()
-
std
::
count
(
outputs
.
begin
(),
outputs
.
end
(),
nullptr
);
}
static
ComputePath
*
make
(
std
::
shared_ptr
<
OpDef
>
op
,
SmallVector
<
TensorInfo
*>
inputs
,
SmallVector
<
TensorInfo
*>
outputs
)
{
auto
*
path
=
new
TensorInfo
::
ComputePath
();
path
->
op
=
op
;
path
->
inputs
=
inputs
;
path
->
outputs
=
outputs
;
// dedup
SmallVector
<
TensorInfo
*>
unique_inputs
=
inputs
;
std
::
sort
(
unique_inputs
.
begin
(),
unique_inputs
.
end
());
unique_inputs
.
erase
(
std
::
unique
(
unique_inputs
.
begin
(),
unique_inputs
.
end
()),
unique_inputs
.
end
());
path
->
unique_inputs
=
unique_inputs
;
// attach users
for
(
auto
input
:
unique_inputs
)
{
input
->
users
.
push_back
(
path
);
}
// attach producer
for
(
auto
output
:
outputs
)
{
output
->
producer
=
path
;
}
return
path
;
}
}
*
producer
=
nullptr
;
void
pin
()
{
++
pinned
;
}
void
unpin
()
{
--
pinned
;
}
void
detach_producer
()
{
if
(
!
producer
)
{
return
;
}
auto
output
=
std
::
find
(
producer
->
outputs
.
begin
(),
producer
->
outputs
.
end
(),
this
);
mgb_assert
(
output
!=
producer
->
outputs
.
end
());
*
output
=
nullptr
;
if
(
producer
->
ref_cnt
()
==
0
)
{
for
(
auto
*
input
:
producer
->
unique_inputs
)
{
input
->
users
.
erase
(
std
::
find
(
input
->
users
.
begin
(),
input
->
users
.
end
(),
producer
));
}
delete
producer
;
}
producer
=
nullptr
;
}
SmallVector
<
ComputePath
*>
users
;
};
}
template
<
>
struct
ToStringTrait
<
interpreter
::
intl
::
TensorInfo
::
Prop
>
{
using
TensorInfo
=
interpreter
::
intl
::
TensorInfo
;
std
::
string
operator
()(
TensorInfo
::
Prop
prop
)
const
{
switch
(
prop
)
{
case
TensorInfo
::
DType
:
return
"dtype"
;
case
TensorInfo
::
DevValue
:
return
"dev_value"
;
case
TensorInfo
::
Device
:
return
"device"
;
case
TensorInfo
::
HostValue
:
return
"host_value"
;
case
TensorInfo
::
Shape
:
return
"shape"
;
default:
return
"unknown"
;
}
}
};
}
imperative/src/impl/op_def.cpp
浏览文件 @
dbb3dd68
...
@@ -70,6 +70,26 @@ BackwardGraphResult OpDef::make_backward_graph(
...
@@ -70,6 +70,26 @@ BackwardGraphResult OpDef::make_backward_graph(
return
def
.
trait
()
->
make_backward_graph
(
def
,
inputs
,
input_requires_grad
,
output_has_grad
);
return
def
.
trait
()
->
make_backward_graph
(
def
,
inputs
,
input_requires_grad
,
output_has_grad
);
}
}
std
::
vector
<
std
::
pair
<
const
char
*
,
std
::
string
>>
OpDef
::
props
(
const
OpDef
&
def
)
{
return
def
.
trait
()
->
props
(
def
);
}
const
char
*
OpDef
::
name
()
const
{
return
trait
()
->
name
;
}
std
::
string
OpDef
::
to_string
()
const
{
std
::
string
builder
=
"{"
;
for
(
auto
&&
[
name
,
value
]
:
props
(
*
this
))
{
builder
+=
name
;
builder
+=
": "
;
builder
+=
value
;
builder
+=
","
;
}
return
builder
+
"}"
;
}
size_t
OpDef
::
hash
()
const
{
size_t
OpDef
::
hash
()
const
{
return
trait
()
->
hash
(
*
this
);
return
trait
()
->
hash
(
*
this
);
}
}
...
...
imperative/src/impl/op_trait.h
浏览文件 @
dbb3dd68
...
@@ -72,6 +72,7 @@ using InferOutputAttrsFallible = detail::OpMeth<
...
@@ -72,6 +72,7 @@ using InferOutputAttrsFallible = detail::OpMeth<
decltype
(
OpDef
::
infer_output_attrs_fallible
)
>
;
decltype
(
OpDef
::
infer_output_attrs_fallible
)
>
;
using
GradMaker
=
detail
::
OpMeth
<
using
GradMaker
=
detail
::
OpMeth
<
decltype
(
OpDef
::
make_backward_graph
)
>
;
decltype
(
OpDef
::
make_backward_graph
)
>
;
using
Props
=
detail
::
OpMeth
<
decltype
(
OpDef
::
props
)
>
;
using
HashFunc
=
detail
::
OpMeth
<
size_t
(
const
OpDef
&
)
>
;
using
HashFunc
=
detail
::
OpMeth
<
size_t
(
const
OpDef
&
)
>
;
using
IsSame
=
detail
::
OpMeth
<
bool
(
const
OpDef
&
,
const
OpDef
&
)
>
;
using
IsSame
=
detail
::
OpMeth
<
bool
(
const
OpDef
&
,
const
OpDef
&
)
>
;
...
@@ -84,6 +85,7 @@ struct OpTrait {
...
@@ -84,6 +85,7 @@ struct OpTrait {
ApplyOnVarNode
apply_on_var_node
;
ApplyOnVarNode
apply_on_var_node
;
InferOutputAttrsFallible
infer_output_attrs_fallible
;
InferOutputAttrsFallible
infer_output_attrs_fallible
;
GradMaker
make_backward_graph
;
GradMaker
make_backward_graph
;
Props
props
;
HashFunc
hash
;
HashFunc
hash
;
IsSame
is_same_st
;
IsSame
is_same_st
;
OpTrait
(
const
char
*
name
);
OpTrait
(
const
char
*
name
);
...
@@ -100,6 +102,7 @@ struct OpTrait {
...
@@ -100,6 +102,7 @@ struct OpTrait {
cb(apply_on_var_node) \
cb(apply_on_var_node) \
cb(infer_output_attrs_fallible) \
cb(infer_output_attrs_fallible) \
cb(make_backward_graph) \
cb(make_backward_graph) \
cb(props) \
cb(hash) \
cb(hash) \
cb(is_same_st)
cb(is_same_st)
...
...
imperative/src/impl/ops/backward_graph.cpp
浏览文件 @
dbb3dd68
...
@@ -148,9 +148,15 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_tensor_attrs(
...
@@ -148,9 +148,15 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_tensor_attrs(
.
graph
().
infer_attrs
(
inputs
);
.
graph
().
infer_attrs
(
inputs
);
}
}
std
::
vector
<
std
::
pair
<
const
char
*
,
std
::
string
>>
props
(
const
OpDef
&
backward_graph
)
{
return
{};
}
OP_TRAIT_REG
(
BackwardGraph
,
BackwardGraph
)
OP_TRAIT_REG
(
BackwardGraph
,
BackwardGraph
)
.
apply_on_physical_tensor
(
backward_impl
)
.
apply_on_physical_tensor
(
backward_impl
)
.
infer_output_attrs_fallible
(
infer_tensor_attrs
)
.
infer_output_attrs_fallible
(
infer_tensor_attrs
)
.
props
(
props
)
.
fallback
();
.
fallback
();
}
// anonymous namespace
}
// anonymous namespace
...
...
imperative/src/impl/ops/opr_attr.cpp
浏览文件 @
dbb3dd68
...
@@ -95,9 +95,14 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* opr) {
...
@@ -95,9 +95,14 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* opr) {
return
OprAttr
::
make
(
registry
->
name
,
std
::
move
(
ctx
.
m_param
),
opr
->
config
());
return
OprAttr
::
make
(
registry
->
name
,
std
::
move
(
ctx
.
m_param
),
opr
->
config
());
}
}
std
::
vector
<
std
::
pair
<
const
char
*
,
std
::
string
>>
props
(
const
OpDef
&
def
)
{
return
{};
}
OP_TRAIT_REG
(
OprAttr
,
OprAttr
)
OP_TRAIT_REG
(
OprAttr
,
OprAttr
)
.
make_from_op_node
(
make_from_op_node
)
.
make_from_op_node
(
make_from_op_node
)
.
apply_on_var_node
(
apply_on_var_node
)
.
apply_on_var_node
(
apply_on_var_node
)
.
props
(
props
)
.
fallback
();
.
fallback
();
}
// anonymous namespace
}
// anonymous namespace
...
...
imperative/src/impl/profiler.cpp
浏览文件 @
dbb3dd68
...
@@ -11,12 +11,14 @@
...
@@ -11,12 +11,14 @@
#include "megbrain/imperative/profiler.h"
#include "megbrain/imperative/profiler.h"
#include "./function_hook.h"
#include <chrono>
#include "megbrain/imperative/ops/opr_attr.h"
#include "megbrain/imperative/ops/opr_attr.h"
#include "megbrain/imperative/physical_tensor.h"
#include "megbrain/imperative/physical_tensor.h"
#include "megbrain/plugin/opr_footprint.h"
#include "megbrain/plugin/opr_footprint.h"
#include "./function_hook.h"
#include "./event_pool.h"
#include "./event_pool.h"
#include "./op_trait.h"
#include "./op_trait.h"
...
@@ -25,200 +27,42 @@ namespace imperative {
...
@@ -25,200 +27,42 @@ namespace imperative {
namespace
{
namespace
{
CompNode
::
UnorderedSet
collect_comp_nodes
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
CompNode
::
UnorderedSet
comp_nodes
;
SmallVector
<
LogicalTensorDesc
>
inp_descs
;
for
(
auto
&&
i
:
inputs
)
{
comp_nodes
.
insert
(
i
->
comp_node
());
inp_descs
.
push_back
({
i
->
layout
(),
i
->
comp_node
(),
{}});
}
SmallVector
<
LogicalTensorDesc
>
oup_descs
=
std
::
get
<
0
>
(
def
.
infer_output_attrs_fallible
(
def
,
inp_descs
));
for
(
auto
&&
output_attr
:
oup_descs
)
{
comp_nodes
.
insert
(
output_attr
.
comp_node
);
}
return
comp_nodes
;
}
DeviceTimer
::
SharedEvent
alloc_recorded_event
(
CompNode
device
)
{
DeviceTimer
::
SharedEvent
alloc_recorded_event
(
CompNode
device
)
{
auto
event
=
EventPool
::
with_timer
().
alloc_shared
(
device
);
auto
event
=
EventPool
::
with_timer
().
alloc_shared
(
device
);
event
->
record
();
event
->
record
();
return
event
;
return
event
;
}
}
OprFootprint
footprint
{};
}
// namespace
}
// namespace
void
DeviceTimer
::
reset
(
thin_function
<
double
()
>
host_timer
)
{
DeviceTimer
::
SharedEvent
DeviceTimer
::
get_device_time
(
CompNode
device
)
{
CompNode
::
foreach
([
this
,
host_timer
](
CompNode
device
)
{
return
alloc_recorded_event
(
device
);
m_base_event_table
[
device
]
=
{
alloc_recorded_event
(
device
),
host_timer
()};
});
m_host_timer
=
host_timer
;
}
}
thin_function
<
double
()
>
DeviceTimer
::
get_device_time
(
CompNode
device
)
{
SmallVector
<
DeviceTimer
::
SharedEvent
>
DeviceTimer
::
get_all
(
SmallVector
<
CompNode
>
device_list
)
{
auto
event
=
EventPool
::
with_timer
().
alloc_shared
(
device
);
SmallVector
<
DeviceTimer
::
SharedEvent
>
results
;
event
->
record
();
for
(
auto
&&
device
:
device_list
)
{
if
(
m_base_event_table
.
count
(
device
)
==
0
)
{
results
.
push_back
(
alloc_recorded_event
(
device
));
m_base_event_table
[
device
]
=
{
alloc_recorded_event
(
device
),
m_host_timer
()};
}
}
auto
base
=
m_base_event_table
[
device
];
return
results
;
return
[
base
,
event
]
{
auto
[
base_event
,
host_time
]
=
base
;
// TODO: sync once for each compnode
event
->
host_wait
();
return
base_event
->
elapsed_time_until
(
*
event
)
*
1000
+
host_time
;
};
}
}
void
DeviceTimer
::
clear
()
{
double
HostTimer
::
get_msecs
()
{
m_base_event_table
.
clear
();
using
namespace
std
::
chrono
;
auto
finish
=
steady_clock
::
now
();
auto
duration
=
duration_cast
<
microseconds
>
(
finish
-
m_start
);
return
(
double
)
duration
.
count
()
/
1e3
;
}
}
size_t
TensorRecorder
::
record_tensor
(
const
TensorPtr
&
tensor
)
{
double
HostTimer
::
get_started_at
()
{
if
(
m_tensor_map
.
count
(
tensor
.
get
())
>
0
)
{
return
m_started_at
;
auto
&
[
prev
,
id
]
=
m_tensor_map
[
tensor
.
get
()];
if
(
prev
.
lock
()
!=
tensor
)
{
prev
=
tensor
;
id
=
m_next_id
++
;
}
return
id
;
}
else
{
auto
id
=
m_next_id
++
;
m_tensor_map
.
insert
(
{
tensor
.
get
(),
{
std
::
weak_ptr
<
Tensor
>
{
tensor
},
id
}});
return
id
;
}
}
void
TensorRecorder
::
clear
()
{
m_next_id
=
0
;
m_tensor_map
.
clear
();
}
Profile
&
Profiler
::
get_profile
()
{
for
(
auto
&
entry
:
m_profile
)
{
for
(
auto
&
[
device
,
device_begin
,
device_end
]
:
entry
.
device_list
)
{
MGB_MARK_USED_VAR
(
device
);
device_begin
=
[
value
=
device_begin
()]
{
return
value
;
};
device_end
=
[
value
=
device_end
()]
{
return
value
;
};
}
}
return
m_profile
;
}
void
Profiler
::
start
(
uint32_t
flags
)
{
m_host_timer
.
reset
();
m_device_timer
.
reset
([
&
]
{
return
m_host_timer
.
get_msecs
();
});
OpTrait
::
for_each_trait
([
this
,
flags
](
OpTrait
&
trait
)
{
auto
hook_apply_on_physical_tensor
=
make_shared_hook
(
&
trait
.
apply_on_physical_tensor
);
auto
hook_apply_on_var_node
=
make_shared_hook
(
&
trait
.
apply_on_var_node
);
hook_apply_on_physical_tensor
->
apply_hook
([
this
,
flags
]
(
auto
&&
apply
,
const
OpDef
&
def
,
SmallVector
<
TensorPtr
>
inputs
)
{
auto
shape2vector
=
[](
const
TensorShape
&
shape
)
{
std
::
vector
<
size_t
>
vector_shape
;
for
(
size_t
i
=
0
;
i
<
shape
.
ndim
;
i
++
)
{
vector_shape
.
push_back
(
shape
[
i
]);
}
return
vector_shape
;
};
ProfileEntry
entry
;
entry
.
id
=
m_entry_count
++
;
// TODO: assign parent
entry
.
parent
=
0
;
// Record apply context and save to m_profile
entry
.
op
=
const_cast
<
OpDef
&>
(
def
).
shared_from_this
();
for
(
auto
&&
input
:
inputs
)
{
entry
.
inputs
.
push_back
({
m_tensor_recorder
.
record_tensor
(
input
),
shape2vector
(
input
->
layout
()),
input
->
comp_node
()});
}
double
host_begin
=
m_host_timer
.
get_msecs
();
auto
&&
comp_nodes
=
collect_comp_nodes
(
def
,
inputs
);
for
(
auto
&&
comp_node
:
comp_nodes
)
{
entry
.
device_list
.
push_back
(
{
comp_node
,
m_device_timer
.
get_device_time
(
comp_node
),
{}});
}
if
(
flags
&
PROFILE_FOOTPRINT
)
{
MGB_LOCK_GUARD
(
m_lock
);
m_entry_stack
.
push
({
&
def
,
&
entry
,
std
::
this_thread
::
get_id
()});
}
// Do real apply
auto
outputs
=
apply
(
def
,
inputs
);
for
(
auto
&
[
cn
,
dev_begin
,
dev_end
]
:
entry
.
device_list
)
{
MGB_MARK_USED_VAR
(
cn
);
MGB_MARK_USED_VAR
(
dev_begin
);
dev_end
=
m_device_timer
.
get_device_time
(
cn
);
}
entry
.
host
=
{
host_begin
,
m_host_timer
.
get_msecs
()};
for
(
auto
&&
output
:
outputs
)
{
entry
.
outputs
.
push_back
(
{
m_tensor_recorder
.
record_tensor
(
output
),
shape2vector
(
output
->
layout
()),
output
->
comp_node
()});
}
if
(
flags
&
PROFILE_FOOTPRINT
)
{
mgb_assert
(
std
::
get
<
1
>
(
m_entry_stack
.
top
())
==
&
entry
);
MGB_LOCK_GUARD
(
m_lock
);
m_entry_stack
.
pop
();
}
m_profile
.
push_back
(
std
::
move
(
entry
));
return
outputs
;
});
if
(
flags
&
PROFILE_FOOTPRINT
)
{
hook_apply_on_var_node
->
apply_hook
(
[
this
](
auto
&&
apply
,
const
OpDef
&
def
,
VarNodeArray
inputs
)
->
VarNodeArray
{
auto
vars
=
apply
(
def
,
std
::
move
(
inputs
));
std
::
remove_reference_t
<
decltype
(
m_entry_stack
.
top
())
>
top
;
{
MGB_LOCK_GUARD
(
m_lock
);
if
(
m_entry_stack
.
empty
())
{
return
vars
;
}
top
=
m_entry_stack
.
top
();
}
auto
[
current_op
,
current_entry
,
thread_id
]
=
top
;
if
(
current_op
!=
&
def
||
thread_id
!=
std
::
this_thread
::
get_id
())
{
return
vars
;
}
auto
&&
footprint_result
=
footprint
.
calc_footprint
(
vars
[
0
]
->
owner_opr
());
current_entry
->
memory
=
footprint_result
.
memory
;
current_entry
->
computation
=
footprint_result
.
computation
;
#if MGB_ENABLE_JSON
current_entry
->
param
=
footprint_result
.
param
;
#endif
return
vars
;
});
}
m_hooker_list
.
push_back
(
std
::
move
(
hook_apply_on_physical_tensor
));
m_hooker_list
.
push_back
(
std
::
move
(
hook_apply_on_var_node
));
});
}
void
Profiler
::
stop
()
{
m_hooker_list
.
clear
();
for
(
auto
&
entry
:
m_profile
)
{
entry
.
wait_device
();
}
}
}
void
Profiler
::
clear
()
{
void
HostTimer
::
reset
()
{
mgb_assert
(
m_entry_stack
.
empty
(),
using
namespace
std
::
chrono
;
"entry_stack should be empty after profile"
);
m_start
=
steady_clock
::
now
();
mgb_assert
(
m_hooker_list
.
empty
(),
"hooks should be released"
);
auto
now_us
=
duration_cast
<
microseconds
>
(
std
::
chrono
::
system_clock
::
now
().
time_since_epoch
());
m_profile
.
clear
();
m_started_at
=
(
double
)(
now_us
.
count
())
/
1e3
;
m_entry_count
=
0
;
m_device_timer
.
clear
();
m_tensor_recorder
.
clear
();
}
}
}
// namespace imperative
}
// namespace imperative
...
...
imperative/src/impl/proxy_graph/mini_graph.h
浏览文件 @
dbb3dd68
...
@@ -471,6 +471,7 @@ class ExecMiniGraph : public ProxyGraph::MiniGraph {
...
@@ -471,6 +471,7 @@ class ExecMiniGraph : public ProxyGraph::MiniGraph {
}
}
if
(
can_pop
)
{
if
(
can_pop
)
{
for
(
auto
_
:
comp_node_trackers
)
{
for
(
auto
_
:
comp_node_trackers
)
{
MGB_MARK_USED_VAR
(
_
);
busy_oprs
.
pop_front
();
busy_oprs
.
pop_front
();
}
}
m_opr
=
busy_oprs
.
front
().
opr
;
m_opr
=
busy_oprs
.
front
().
opr
;
...
...
imperative/src/include/megbrain/imperative/interpreter.h
浏览文件 @
dbb3dd68
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
*/
*/
#include <atomic>
#include <atomic>
#include <any>
#include "megbrain/imperative/op_def.h"
#include "megbrain/imperative/op_def.h"
...
@@ -42,12 +43,15 @@ struct Interpreter {
...
@@ -42,12 +43,15 @@ struct Interpreter {
virtual
void
sync
()
=
0
;
virtual
void
sync
()
=
0
;
virtual
void
close
()
=
0
;
virtual
void
close
()
=
0
;
virtual
void
set_swap_flag
(
bool
)
=
0
;
virtual
void
set_drop_flag
(
bool
)
=
0
;
virtual
void
set_buffer_length
(
int
)
=
0
;
virtual
void
config_async_level
(
int
level
)
=
0
;
virtual
int
get_option
(
std
::
string
name
)
=
0
;
virtual
int
get_async_level
()
=
0
;
virtual
void
set_option
(
std
::
string
name
,
int
value
)
=
0
;
virtual
void
start_profile
(
std
::
unordered_map
<
std
::
string
,
int
>
option
)
=
0
;
virtual
void
stop_profile
(
std
::
string
basename
,
std
::
string
format
)
=
0
;
virtual
void
push_scope
(
std
::
string
name
)
=
0
;
virtual
void
pop_scope
(
std
::
string
name
)
=
0
;
};
};
virtual
std
::
unique_ptr
<
Channel
>
create_channel
()
=
0
;
virtual
std
::
unique_ptr
<
Channel
>
create_channel
()
=
0
;
...
...
imperative/src/include/megbrain/imperative/op_def.h
浏览文件 @
dbb3dd68
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
#include "megbrain/graph.h"
#include "megbrain/graph.h"
#include "megbrain/imperative/physical_tensor.h"
#include "megbrain/imperative/physical_tensor.h"
#include "megbrain/imperative/utils/to_string.h"
namespace
mgb
{
namespace
mgb
{
namespace
imperative
{
namespace
imperative
{
...
@@ -80,8 +81,15 @@ public:
...
@@ -80,8 +81,15 @@ public:
const
SmallVector
<
bool
>&
input_requires_grad
,
const
SmallVector
<
bool
>&
input_requires_grad
,
const
SmallVector
<
bool
>&
output_has_grad
);
const
SmallVector
<
bool
>&
output_has_grad
);
static
std
::
vector
<
std
::
pair
<
const
char
*
,
std
::
string
>>
props
(
const
OpDef
&
def
);
const
OpTrait
*
trait
()
const
;
const
OpTrait
*
trait
()
const
;
const
char
*
name
()
const
;
std
::
string
to_string
()
const
;
virtual
size_t
hash
()
const
;
virtual
size_t
hash
()
const
;
virtual
bool
is_same_st
(
const
Hashable
&
)
const
;
virtual
bool
is_same_st
(
const
Hashable
&
)
const
;
...
@@ -96,6 +104,16 @@ public:
...
@@ -96,6 +104,16 @@ public:
}
}
};
};
template
<
>
struct
ToStringTrait
<
OpDef
*>
{
std
::
string
operator
()(
OpDef
*
op
)
const
{
if
(
op
==
nullptr
)
{
return
"nullptr"
;
}
return
op
->
to_string
();
}
};
}
// namespace imperative
}
// namespace imperative
}
// namespace mgb
}
// namespace mgb
...
...
imperative/src/include/megbrain/imperative/profiler.h
浏览文件 @
dbb3dd68
...
@@ -11,10 +11,12 @@
...
@@ -11,10 +11,12 @@
#pragma once
#pragma once
#include <any>
#include <optional>
#include <optional>
#include <stack>
#include <map>
#include <list>
#include <variant>
#include <fstream>
#include <chrono>
#include <bitset>
#include "megbrain/comp_node.h"
#include "megbrain/comp_node.h"
#include "megbrain/graph/event.h"
#include "megbrain/graph/event.h"
...
@@ -27,89 +29,298 @@
...
@@ -27,89 +29,298 @@
namespace
mgb
{
namespace
mgb
{
namespace
imperative
{
namespace
imperative
{
using
ProfileTensor
=
std
::
tuple
<
size_t
,
std
::
vector
<
size_t
>
,
CompNode
>
;
struct
ProfileEntry
{
using
TimeClosure
=
std
::
function
<
double
()
>
;
size_t
id
;
size_t
parent
;
std
::
shared_ptr
<
OpDef
>
op
;
//(host_begin, host_end)
std
::
tuple
<
double
,
double
>
host
;
//[(device, device_begin, device_end)]
std
::
vector
<
std
::
tuple
<
CompNode
,
TimeClosure
,
TimeClosure
>>
device_list
;
std
::
vector
<
ProfileTensor
>
inputs
;
std
::
vector
<
ProfileTensor
>
outputs
;
long
long
memory
=
0
;
long
long
computation
=
0
;
#if MGB_ENABLE_JSON
std
::
shared_ptr
<
json
::
Value
>
param
;
#endif
void
wait_device
()
{
for
(
auto
&
[
cn
,
begin
,
end
]
:
device_list
)
{
MGB_MARK_USED_VAR
(
cn
);
begin
=
[
begin
=
begin
()]
{
return
begin
;
};
end
=
[
end
=
end
()]
{
return
end
;
};
}
}
};
using
Profile
=
std
::
list
<
ProfileEntry
>
;
class
DeviceTimer
{
class
DeviceTimer
{
public:
public:
using
SharedEvent
=
std
::
shared_ptr
<
CompNode
::
Event
>
;
using
SharedEvent
=
std
::
shared_ptr
<
CompNode
::
Event
>
;
DeviceTimer
()
=
default
;
DeviceTimer
()
=
default
;
void
reset
(
thin_function
<
double
()
>
host_timer
);
SharedEvent
get_device_time
(
CompNode
device
);
thin_function
<
double
()
>
get_device_time
(
CompNode
device
);
SmallVector
<
SharedEvent
>
get_all
(
SmallVector
<
CompNode
>
device_list
);
void
clear
()
;
}
;
class
HostTimer
{
public:
void
reset
();
double
get_msecs
();
double
get_started_at
();
private:
private:
CompNode
::
UnorderedMap
<
std
::
tuple
<
SharedEvent
,
double
>>
m_base_event_table
;
decltype
(
std
::
chrono
::
steady_clock
::
now
())
m_start
;
thin_function
<
double
()
>
m_host_timer
;
double
m_started_at
;
};
};
class
TensorRecorder
{
private:
// active tensors
std
::
unordered_map
<
Tensor
*
,
std
::
tuple
<
std
::
weak_ptr
<
Tensor
>
,
size_t
>>
m_tensor_map
;
size_t
m_next_id
;
class
ProfilerBase
{
public:
public:
size_t
record_tensor
(
const
TensorPtr
&
tensor
);
using
Host
=
std
::
thread
::
id
;
void
clear
();
using
Device
=
CompNode
;
struct
HostInstant
{
Host
tid
;
double
time
;
void
wait
()
{}
};
struct
DeviceInstant
{
double
before
;
std
::
shared_ptr
<
CompNode
::
Event
>
event
;
double
after
;
void
wait
()
{
event
->
host_wait
();
}
};
using
Instant
=
std
::
variant
<
HostInstant
,
DeviceInstant
>
;
template
<
typename
TEvent
>
struct
EventRecord
{
Instant
instant
;
TEvent
data
;
HostInstant
&
host
()
{
return
std
::
get
<
HostInstant
>
(
instant
);
}
DeviceInstant
device
()
{
return
std
::
get
<
DeviceInstant
>
(
instant
);
}
void
wait
()
{
std
::
visit
([
&
](
auto
&
instant
){
instant
.
wait
();
},
instant
);
}
};
protected:
HostInstant
record_host
()
{
return
{
std
::
this_thread
::
get_id
(),
m_host_timer
.
get_msecs
()};
}
DeviceInstant
record_device
(
Device
device
)
{
auto
before
=
m_host_timer
.
get_msecs
();
auto
event
=
m_device_timer
.
get_device_time
(
device
);
auto
after
=
m_host_timer
.
get_msecs
();
return
{
before
,
event
,
after
};
}
protected:
std
::
atomic_int64_t
m_last_id
=
0
;
HostTimer
m_host_timer
;
DeviceTimer
m_device_timer
;
Spinlock
m_lock
;
};
};
class
Profiler
{
template
<
typename
...
TEvents
>
class
Profiler
:
public
ProfilerBase
{
public:
public:
enum
Flags
{
using
Record
=
std
::
variant
<
EventRecord
<
TEvents
>
...
>
;
PROFILE_FOOTPRINT
=
1
,
using
Mask
=
std
::
bitset
<
sizeof
...(
TEvents
)
>
;
struct
Data
{
std
::
vector
<
Record
>
records
;
double
started_at
;
};
};
template
<
typename
TEvent
,
size_t
index
=
0
>
static
constexpr
size_t
index_of
()
{
if
constexpr
(
index
==
std
::
variant_size_v
<
Record
>
)
{
return
index
;
}
else
if
constexpr
(
std
::
is_same_v
<
EventRecord
<
TEvent
>
,
std
::
variant_alternative_t
<
index
,
Record
>>
)
{
return
index
;
}
else
{
return
index_of
<
TEvent
,
index
+
1
>
();
}
};
template
<
typename
...
TEvents2
>
static
Mask
mask_of
()
{
return
Mask
{}
|
(
Mask
{}.
set
(
index_of
<
TEvents2
>
())
|
...);
}
enum
Status
{
NotStarted
,
Profiling
,
Stopped
};
public:
public:
Profiler
()
=
default
;
template
<
typename
TEvent
,
typename
...
TArgs
>
// Start profiler by hook OpTrait
void
record_host
(
TArgs
&&
...
args
)
{
void
start
(
uint32_t
flags
);
auto
instant
=
HostInstant
{
std
::
this_thread
::
get_id
(),
m_host_timer
.
get_msecs
()};
// Stop profiler and clean environment
MGB_LOCK_GUARD
(
m_lock
);
void
stop
();
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
void
clear
();
return
;
Profile
&
get_profile
();
}
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
}
template
<
typename
TEvent
,
typename
...
TArgs
>
void
record_device
(
Device
device
,
TArgs
&&
...
args
)
{
auto
before
=
m_host_timer
.
get_msecs
();
auto
event
=
m_device_timer
.
get_device_time
(
device
);
auto
after
=
m_host_timer
.
get_msecs
();
auto
instant
=
DeviceInstant
{
before
,
event
,
after
};
MGB_LOCK_GUARD
(
m_lock
);
if
(
!
m_event_mask
.
test
(
index_of
<
TEvent
>
()))
{
return
;
}
mgb_assert
(
m_status
!=
Stopped
,
"record after stop"
);
m_record_list
.
emplace_back
(
EventRecord
<
TEvent
>
{
std
::
move
(
instant
),
{
std
::
forward
<
TArgs
>
(
args
)...}});
}
void
start
(
Mask
mask
)
{
MGB_LOCK_GUARD
(
m_lock
);
mgb_assert
(
m_status
==
NotStarted
,
"profiler already started"
);
m_status
=
Profiling
;
m_event_mask
=
mask
;
m_host_timer
.
reset
();
}
Data
stop
()
{
MGB_LOCK_GUARD
(
m_lock
);
mgb_assert
(
m_status
==
Profiling
,
"profiler not active"
);
m_status
=
Stopped
;
for
(
auto
&&
record
:
m_record_list
)
{
std
::
visit
([
&
](
auto
&
record
){
record
.
wait
();
},
record
);
}
auto
records
=
std
::
move
(
m_record_list
);
return
{
records
,
m_host_timer
.
get_started_at
()
};
}
protected:
std
::
vector
<
Record
>
m_record_list
;
Mask
m_event_mask
;
Status
m_status
=
NotStarted
;
};
class
ChromeTraceEvent
{
public:
ChromeTraceEvent
&
name
(
std
::
string
name
)
{
m_name
=
std
::
move
(
name
);
return
*
this
;
}
ChromeTraceEvent
&
tid
(
uint64_t
tid
)
{
m_tid
=
std
::
move
(
tid
);
return
*
this
;
}
ChromeTraceEvent
&
cat
(
std
::
string
cat
)
{
m_cat
=
std
::
move
(
cat
);
return
*
this
;
}
ChromeTraceEvent
&
pid
(
uint64_t
pid
)
{
m_pid
=
pid
;
return
*
this
;
}
ChromeTraceEvent
&
id
(
uint64_t
id
)
{
m_id
=
id
;
return
*
this
;
}
ChromeTraceEvent
&
idx
(
uint64_t
idx
)
{
m_idx
=
idx
;
return
*
this
;
}
ChromeTraceEvent
&
ts
(
double
ts
)
{
m_ts
=
ts
;
return
*
this
;
}
ChromeTraceEvent
&
dur
(
double
dur
)
{
m_dur
=
dur
;
return
*
this
;
}
ChromeTraceEvent
&
ph
(
char
ph
)
{
m_ph
=
ph
;
return
*
this
;
}
ChromeTraceEvent
&
bp
(
char
bp
)
{
m_bp
=
bp
;
return
*
this
;
}
ChromeTraceEvent
&
args
(
std
::
shared_ptr
<
json
::
Object
>
args
)
{
m_args
=
std
::
move
(
args
);
return
*
this
;
}
ChromeTraceEvent
&
arg
(
std
::
string
key
,
std
::
string
value
)
{
if
(
!
m_args
)
{
m_args
=
json
::
Object
::
make
();
}
(
*
m_args
)[
key
]
=
json
::
String
::
make
(
value
);
return
*
this
;
}
ChromeTraceEvent
&
arg
(
std
::
string
key
,
double
value
)
{
if
(
!
m_args
)
{
m_args
=
json
::
Object
::
make
();
}
(
*
m_args
)[
key
]
=
json
::
Number
::
make
(
value
);
return
*
this
;
}
ChromeTraceEvent
&
arg
(
std
::
string
key
,
std
::
shared_ptr
<
json
::
Value
>
value
)
{
if
(
!
m_args
)
{
m_args
=
json
::
Object
::
make
();
}
(
*
m_args
)[
key
]
=
value
;
return
*
this
;
}
std
::
shared_ptr
<
json
::
Object
>
to_json
()
const
{
auto
result
=
json
::
Object
::
make
();
auto
prop_str
=
[
&
](
auto
key
,
auto
value
)
{
if
(
value
.
empty
())
{
return
;
}
(
*
result
)[
key
]
=
json
::
String
::
make
(
value
);
};
auto
prop_num
=
[
&
](
auto
key
,
auto
value
)
{
if
(
!
value
)
{
return
;
}
(
*
result
)[
key
]
=
json
::
Number
::
make
(
value
.
value
());
};
auto
prop_char
=
[
&
](
auto
key
,
auto
value
)
{
if
(
!
value
)
{
return
;
}
(
*
result
)[
key
]
=
json
::
String
::
make
(
std
::
string
{}
+
value
.
value
());
};
prop_str
(
"name"
,
m_name
);
prop_num
(
"tid"
,
m_tid
);
prop_str
(
"cat"
,
m_cat
);
prop_num
(
"pid"
,
m_pid
);
prop_num
(
"id"
,
m_id
);
prop_num
(
"idx"
,
m_idx
);
prop_num
(
"ts"
,
m_ts
);
prop_num
(
"dur"
,
m_dur
);
prop_char
(
"ph"
,
m_ph
);
prop_char
(
"bp"
,
m_bp
);
if
(
m_args
)
{
(
*
result
)[
"args"
]
=
m_args
;
}
return
result
;
}
private:
private:
DeviceTimer
m_device_timer
;
std
::
string
m_name
;
RealTimer
m_host_timer
;
std
::
string
m_cat
;
Profile
m_profile
;
TensorRecorder
m_tensor_recorder
;
std
::
optional
<
uint64_t
>
m_tid
;
std
::
stack
<
std
::
tuple
<
const
OpDef
*
,
ProfileEntry
*
,
std
::
thread
::
id
>>
std
::
optional
<
uint64_t
>
m_pid
;
m_entry_stack
;
std
::
optional
<
uint64_t
>
m_id
;
// Hold profile owned by this Profiler
std
::
optional
<
uint64_t
>
m_idx
;
std
::
unique_ptr
<
Profile
>
m_owned_profile
;
std
::
optional
<
double
>
m_ts
;
// Hold hooks, cleared when stop
std
::
optional
<
double
>
m_dur
;
std
::
vector
<
std
::
any
>
m_hooker_list
;
std
::
optional
<
char
>
m_ph
;
size_t
m_entry_count
=
0
;
std
::
optional
<
char
>
m_bp
;
Spinlock
m_lock
;
std
::
shared_ptr
<
json
::
Object
>
m_args
;
std
::
unordered_map
<
Tensor
*
,
std
::
weak_ptr
<
Tensor
>>
m_recorded_tensors
;
};
class
ChromeTraceEventList
{
public:
ChromeTraceEvent
&
new_event
()
{
m_content
.
emplace_back
();
return
m_content
.
back
();
}
std
::
shared_ptr
<
json
::
Array
>
to_json
()
{
auto
result
=
json
::
Array
::
make
();
for
(
auto
&&
event
:
m_content
)
{
result
->
add
(
event
.
to_json
());
}
return
result
;
}
private:
std
::
vector
<
ChromeTraceEvent
>
m_content
;
};
};
}
// namespace imperative
}
// namespace imperative
...
...
imperative/src/include/megbrain/imperative/utils/to_string.h
0 → 100644
浏览文件 @
dbb3dd68
/**
* \file imperative/src/include/megbrain/imperative/utils/to_string.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <string>
#include <type_traits>
#include <memory>
#include <tuple>
#include "megbrain/utils/small_vector.h"
#include "megbrain/tensor.h"
namespace
mgb
::
imperative
{
template
<
typename
T
>
struct
ToStringTrait
;
template
<
typename
T
>
std
::
string
to_string
(
const
T
&
value
)
{
return
ToStringTrait
<
T
>
{}(
value
);
}
template
<
typename
T
>
struct
ToStringTrait
{
std
::
string
operator
()(
const
T
&
value
)
const
{
return
std
::
to_string
(
value
);
}
};
template
<
>
struct
ToStringTrait
<
std
::
string
>
{
std
::
string
operator
()(
const
std
::
string
&
value
)
const
{
return
value
;
}
};
template
<
typename
T
,
unsigned
N
>
struct
ToStringTrait
<
SmallVector
<
T
,
N
>>
{
std
::
string
operator
()(
const
SmallVector
<
T
,
N
>&
sv
)
const
{
if
(
sv
.
empty
())
{
return
"[]"
;
}
std
::
string
result
=
"["
;
result
+=
to_string
(
sv
[
0
]);
for
(
size_t
i
=
1
;
i
<
sv
.
size
();
++
i
)
{
result
+=
", "
;
result
+=
to_string
(
sv
[
i
]);
}
return
result
+
"]"
;
}
};
template
<
typename
T
>
struct
ToStringTrait
<
std
::
shared_ptr
<
T
>>
{
std
::
string
operator
()(
const
std
::
shared_ptr
<
T
>&
sp
)
const
{
return
to_string
(
sp
.
get
());
}
};
template
<
typename
TKey
,
typename
TValue
>
struct
ToStringTrait
<
std
::
pair
<
TKey
,
TValue
>>
{
std
::
string
operator
()(
const
std
::
pair
<
TKey
,
TValue
>&
pr
)
const
{
return
"("
+
to_string
(
pr
.
first
)
+
", "
+
to_string
(
pr
.
second
)
+
")"
;
}
};
template
<
typename
TItem
,
typename
...
TItems
>
struct
ToStringTrait
<
std
::
tuple
<
TItem
,
TItems
...
>>
{
std
::
string
operator
()(
const
std
::
tuple
<
TItem
,
TItems
...
>&
tp
)
const
{
auto
folder
=
[
&
](
auto
...
item
){
return
(
...
+
(
","
+
to_string
(
item
)));
};
return
"("
+
std
::
apply
(
folder
,
tp
)
+
")"
;
}
};
template
<
typename
T
>
struct
ToStringTrait
<
T
*>
{
std
::
string
operator
()(
T
*
p
)
const
{
return
ssprintf
(
"%p"
,
p
);
}
};
template
<
>
struct
ToStringTrait
<
TensorShape
>
{
std
::
string
operator
()(
TensorShape
shape
)
const
{
if
(
shape
.
ndim
>
TensorShape
::
MAX_NDIM
)
{
printf
(
"ndim: %d
\n
"
,
(
int
)
shape
.
ndim
);
return
"[]"
;
}
mgb_assert
(
shape
.
ndim
<=
TensorShape
::
MAX_NDIM
);
if
(
shape
.
ndim
==
0
)
{
return
"[ ]"
;
}
std
::
string
result
=
"[ "
+
std
::
to_string
(
shape
[
0
]);
for
(
size_t
i
=
1
;
i
<
shape
.
ndim
;
i
++
)
{
result
+=
", "
;
result
+=
std
::
to_string
(
shape
[
i
]);
}
return
result
+
" ]"
;
}
};
template
<
>
struct
ToStringTrait
<
DType
>
{
std
::
string
operator
()(
DType
dtype
)
const
{
return
dtype
.
name
();
}
};
template
<
>
struct
ToStringTrait
<
CompNode
>
{
std
::
string
operator
()(
CompNode
device
)
const
{
return
device
.
to_string
();
}
};
}
imperative/tablegen/autogen.cpp
浏览文件 @
dbb3dd68
...
@@ -222,10 +222,25 @@ static void gen_op_def_c_body_single(raw_ostream &os, MgbOp& op) {
...
@@ -222,10 +222,25 @@ static void gen_op_def_c_body_single(raw_ostream &os, MgbOp& op) {
os
<<
mlir
::
tblgen
::
tgfmt
(
hashable
->
getCmpFunctionTemplate
(),
&
ctx
,
"a_"
,
"b_"
);
os
<<
mlir
::
tblgen
::
tgfmt
(
hashable
->
getCmpFunctionTemplate
(),
&
ctx
,
"a_"
,
"b_"
);
os
<<
"}
\n
"
;
os
<<
"}
\n
"
;
// generate props()
os
<<
formatv
(
"std::vector<std::pair<const char*, std::string>> {0}(const OpDef& def_) {{
\n
"
,
formatMethImpl
(
"props"
)
);
os
<<
formatv
(
" auto&& op_ = def_.cast_final_safe<{0}>();
\n
"
" static_cast<void>(op_);
\n
"
,
className
);
ctx
.
withSelf
(
"op_"
);
os
<<
mlir
::
tblgen
::
tgfmt
(
hashable
->
getPropsFunctionTemplate
(),
&
ctx
);
os
<<
"}
\n
"
;
os
<<
"} // anonymous namespace
\n
"
;
os
<<
"} // anonymous namespace
\n
"
;
methods
.
push_back
(
"hash"
);
methods
.
push_back
(
"hash"
);
methods
.
push_back
(
"is_same_st"
);
methods
.
push_back
(
"is_same_st"
);
methods
.
push_back
(
"props"
);
}
}
if
(
!
methods
.
empty
())
{
if
(
!
methods
.
empty
())
{
os
<<
formatv
(
os
<<
formatv
(
...
@@ -423,7 +438,7 @@ EnumWrapper<{0}::{1}>::type2str = {{
...
@@ -423,7 +438,7 @@ EnumWrapper<{0}::{1}>::type2str = {{
std
::
vector
<
std
::
string
>
getsetters
;
std
::
vector
<
std
::
string
>
getsetters
;
for
(
auto
&&
i
:
op
.
getMgbAttributes
())
{
for
(
auto
&&
i
:
op
.
getMgbAttributes
())
{
getsetters
.
push_back
(
formatv
(
getsetters
.
push_back
(
formatv
(
"{{
\"
{1}
\"
, py_get_generic({0}, {1}), py_set_generic({0}, {1}),
\"
{1}
\"
, NULL},"
,
"{{
const_cast<char*>(
\"
{1}
\"
), py_get_generic({0}, {1}), py_set_generic({0}, {1}), const_cast<char*>(
\"
{1}
\"
)
, NULL},"
,
className
,
i
.
name
));
className
,
i
.
name
));
}
}
...
...
imperative/tablegen/helper.h
浏览文件 @
dbb3dd68
...
@@ -66,7 +66,7 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase {
...
@@ -66,7 +66,7 @@ struct MgbEnumAttrMixin : public MgbAttrWrapperBase {
}
}
llvm
::
StringRef
getParentNamespace
()
const
{
llvm
::
StringRef
getParentNamespace
()
const
{
return
getBaseRecord
()
->
getValueAsString
(
"parentNamespce"
);
return
getBaseRecord
()
->
getValueAsString
(
"parentNamesp
a
ce"
);
}
}
llvm
::
StringRef
getEnumName
()
const
{
llvm
::
StringRef
getEnumName
()
const
{
return
getBaseRecord
()
->
getValueAsString
(
"enumName"
);
return
getBaseRecord
()
->
getValueAsString
(
"enumName"
);
...
@@ -87,6 +87,9 @@ struct MgbHashableAttrMixin : public MgbAttrWrapperBase {
...
@@ -87,6 +87,9 @@ struct MgbHashableAttrMixin : public MgbAttrWrapperBase {
llvm
::
StringRef
getCmpFunctionTemplate
()
const
{
llvm
::
StringRef
getCmpFunctionTemplate
()
const
{
return
getBaseRecord
()
->
getValueAsString
(
"cmpFunction"
);
return
getBaseRecord
()
->
getValueAsString
(
"cmpFunction"
);
}
}
llvm
::
StringRef
getReprFunctionTemplate
()
const
{
return
getBaseRecord
()
->
getValueAsString
(
"reprFunction"
);
}
};
};
struct
MgbAliasAttrMixin
:
public
MgbAttrWrapperBase
{
struct
MgbAliasAttrMixin
:
public
MgbAttrWrapperBase
{
...
@@ -205,6 +208,39 @@ private:
...
@@ -205,6 +208,39 @@ private:
body
+=
" return true;
\n
"
;
body
+=
" return true;
\n
"
;
return
body
;
return
body
;
}
}
std
::
string
getDefaultPropsFunction
()
const
{
std
::
string
body
=
" std::vector<std::pair<const char*, std::string>> props_;
\n
"
;
if
(
!
getMgbAttributes
().
empty
())
{
mlir
::
tblgen
::
FmtContext
ctx
;
for
(
auto
&&
it
:
getMgbAttributes
())
{
if
(
auto
*
enumAttr
=
llvm
::
dyn_cast
<
MgbEnumAttrMixin
>
(
&
it
.
attr
))
{
body
+=
formatv
(
" switch ({0}){{
\n
"
,
"$_self."
+
it
.
name
);
for
(
auto
&&
enumMember
:
enumAttr
->
getEnumMembers
())
{
body
+=
formatv
(
" case {0}::{1}::{2}:
\n
"
,
getCppClassName
(),
enumAttr
->
getEnumName
(),
enumMember
);
body
+=
formatv
(
" props_.emplace_back(
\"
{0}
\"
,
\"
{1}
\"
);
\n
"
,
it
.
name
,
enumMember
);
body
+=
" break;
\n
"
;
}
body
+=
" default: break;
\n
"
;
body
+=
" }
\n
"
;
}
else
{
auto
&&
attr
=
llvm
::
cast
<
MgbHashableAttrMixin
>
(
it
.
attr
);
body
+=
formatv
(
" props_.emplace_back(
\"
{0}
\"
, {1});
\n
"
,
it
.
name
,
mlir
::
tblgen
::
tgfmt
(
attr
.
getReprFunctionTemplate
(),
&
ctx
,
"$_self."
+
it
.
name
)
);
}
}
}
body
+=
" return props_;
\n
"
;
return
body
;
}
public:
public:
static
bool
classof
(
const
Operator
*
op
)
{
static
bool
classof
(
const
Operator
*
op
)
{
return
op
->
getDef
().
isSubClassOf
(
"MgbHashableOpMixin"
);
return
op
->
getDef
().
isSubClassOf
(
"MgbHashableOpMixin"
);
...
@@ -222,7 +258,13 @@ public:
...
@@ -222,7 +258,13 @@ public:
}
}
return
getDefaultCmpFunction
();
return
getDefaultCmpFunction
();
}
}
std
::
string
getPropsFunctionTemplate
()
const
{
if
(
auto
f
=
getDef
().
getValueAsOptionalString
(
"propsFunction"
))
{
return
f
.
getValue
().
str
();
}
return
getDefaultPropsFunction
();
}
};
};
}
// namespace tblgen
}
// namespace tblgen
}
// namespace mlir
}
// namespace mlir
\ No newline at end of file
src/core/include/megbrain/ir/base.td
浏览文件 @
dbb3dd68
...
@@ -30,6 +30,7 @@ class MgbHashableAttrMixin {
...
@@ -30,6 +30,7 @@ class MgbHashableAttrMixin {
string hashFunction = "mgb::hash($0)";
string hashFunction = "mgb::hash($0)";
// return 0 for eq, else for ne
// return 0 for eq, else for ne
string cmpFunction = "$0 != $1";
string cmpFunction = "$0 != $1";
string reprFunction = "std::to_string($0)";
}
}
class MgbEnumAttrMixin<string namespace, string name, list<string> members> {
class MgbEnumAttrMixin<string namespace, string name, list<string> members> {
...
@@ -98,6 +99,7 @@ def MgbStringAttr : HashableAttr<"std::string"> {
...
@@ -98,6 +99,7 @@ def MgbStringAttr : HashableAttr<"std::string"> {
let storageType = "::mlir::StringAttr";
let storageType = "::mlir::StringAttr";
let convertFromStorage = "$_self.getValue().str()";
let convertFromStorage = "$_self.getValue().str()";
let constBuilderCall = "$_builder.getStringAttr($0)"; // llvm::StringRef implicit ctor
let constBuilderCall = "$_builder.getStringAttr($0)"; // llvm::StringRef implicit ctor
string reprFunction = "$0";
}
}
class MgbArrayAttr<MgbAttrWrapper elem>:
class MgbArrayAttr<MgbAttrWrapper elem>:
...
@@ -123,6 +125,7 @@ class MgbArrayAttr<MgbAttrWrapper elem>:
...
@@ -123,6 +125,7 @@ class MgbArrayAttr<MgbAttrWrapper elem>:
" });\n"
" });\n"
" return $_builder.getArrayAttr(ret" # recursionDepth # ");"
" return $_builder.getArrayAttr(ret" # recursionDepth # ");"
"}()";
"}()";
let reprFunction = "\"{std::vector}\"";
}
}
defvar EmptyStrList = !listsplat("", 0);
defvar EmptyStrList = !listsplat("", 0);
...
@@ -168,6 +171,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members>:
...
@@ -168,6 +171,7 @@ class MgbEnumAttr<string namespace, string enumName, list<string> members>:
let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0))";
let hashFunction = "mgb::enumhash()($0)";
let hashFunction = "mgb::enumhash()($0)";
string reprFunction = "std::to_string((int)$0)";
}
}
class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>:
class MgbEnumAliasAttr<string namespace, string enumName, MgbEnumAttr base>:
...
@@ -179,12 +183,14 @@ def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> {
...
@@ -179,12 +183,14 @@ def MgbDTypeAttr: HashableAttr<"::megdnn::DType"> {
let convertFromStorage = underlyingType # "::from_enum(static_cast<::megdnn::DTypeEnum>($_self.getInt()))";
let convertFromStorage = underlyingType # "::from_enum(static_cast<::megdnn::DTypeEnum>($_self.getInt()))";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0.enumv()))";
let constBuilderCall = "$_builder.getI32IntegerAttr(static_cast<int32_t>($0.enumv()))";
let hashFunction = "mgb::hash($0.handle())";
let hashFunction = "mgb::hash($0.handle())";
let reprFunction = "$0.name()";
}
}
def MgbCompNodeAttr: HashableAttr<"::mgb::CompNode"> {
def MgbCompNodeAttr: HashableAttr<"::mgb::CompNode"> {
let storageType = "::mlir::StringAttr";
let storageType = "::mlir::StringAttr";
let convertFromStorage = underlyingType # "::load($_self.getValue().str())";
let convertFromStorage = underlyingType # "::load($_self.getValue().str())";
let constBuilderCall = "$_builder.getStringAttr($0.to_string_logical())";
let constBuilderCall = "$_builder.getStringAttr($0.to_string_logical())";
string reprFunction = "$0.to_string()";
}
}
def MgbTensorShapeAttr: HashableAttr<"::megdnn::TensorShape"> {
def MgbTensorShapeAttr: HashableAttr<"::megdnn::TensorShape"> {
...
@@ -209,6 +215,7 @@ def MgbTensorShapeAttr: HashableAttr<"::megdnn::TensorShape"> {
...
@@ -209,6 +215,7 @@ def MgbTensorShapeAttr: HashableAttr<"::megdnn::TensorShape"> {
" }\n"
" }\n"
" return $_builder.getArrayAttr(ret);"
" return $_builder.getArrayAttr(ret);"
"}()";
"}()";
let reprFunction = "$0.to_string()";
}
}
class MgbDefaultValuedAttr<MgbAttrWrapper attr, string value>:
class MgbDefaultValuedAttr<MgbAttrWrapper attr, string value>:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录