Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
3246ee5e
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
3246ee5e
编写于
9月 27, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(mge): use DepType::HOST_VALUE in trace when possible
GitOrigin-RevId: 5d47ed263fe0c5d65f86d53d61bd1c427139c06d
上级
0e303710
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
41 addition
and
6 deletion
+41
-6
imperative/python/src/graph_rt.cpp
imperative/python/src/graph_rt.cpp
+3
-3
imperative/src/impl/opr_utility.cpp
imperative/src/impl/opr_utility.cpp
+13
-2
imperative/src/include/megbrain/imperative/opr_utility.h
imperative/src/include/megbrain/imperative/opr_utility.h
+3
-1
imperative/src/test/opr_utility.cpp
imperative/src/test/opr_utility.cpp
+22
-0
未找到文件。
imperative/python/src/graph_rt.cpp
浏览文件 @
3246ee5e
...
@@ -483,13 +483,13 @@ void init_graph_rt(py::module m) {
...
@@ -483,13 +483,13 @@ void init_graph_rt(py::module m) {
},
},
py
::
arg
(),
py
::
arg
(),
py
::
arg
(),
py
::
arg
()
=
py
::
none
(),
py
::
arg
()
=
py
::
tuple
(),
py
::
arg
(
"graph"
)
=
py
::
none
());
py
::
arg
(),
py
::
arg
(),
py
::
arg
(),
py
::
arg
()
=
py
::
none
(),
py
::
arg
()
=
py
::
tuple
(),
py
::
arg
(
"graph"
)
=
py
::
none
());
auto
output_callback
=
[](
auto
callback
,
const
std
::
vector
<
cg
::
VarNode
*>&
inputs
,
bool
borrow
=
false
)
{
auto
output_callback
=
[](
auto
callback
,
const
std
::
vector
<
cg
::
VarNode
*>&
inputs
,
bool
borrow
=
false
,
bool
prefer_host_value
=
false
)
{
SymbolVarArray
sinputs
;
SymbolVarArray
sinputs
;
for
(
auto
i
:
inputs
)
{
for
(
auto
i
:
inputs
)
{
sinputs
.
emplace_back
(
i
);
sinputs
.
emplace_back
(
i
);
}
}
static_assert
(
!
std
::
is_reference
<
decltype
(
callback
)
>::
value
);
static_assert
(
!
std
::
is_reference
<
decltype
(
callback
)
>::
value
);
opr
::
OutputCallback
::
Param
param
{
std
::
move
(
callback
),
borrow
};
opr
::
OutputCallback
::
Param
param
{
std
::
move
(
callback
),
borrow
,
prefer_host_value
};
auto
output
=
opr
::
OutputCallback
::
make
(
std
::
move
(
param
),
sinputs
);
auto
output
=
opr
::
OutputCallback
::
make
(
std
::
move
(
param
),
sinputs
);
return
output
.
node
();
return
output
.
node
();
};
};
...
@@ -519,7 +519,7 @@ void init_graph_rt(py::module m) {
...
@@ -519,7 +519,7 @@ void init_graph_rt(py::module m) {
hv_with_event
.
second
->
record
();
hv_with_event
.
second
->
record
();
p
->
set
(
std
::
move
(
hv_with_event
));
p
->
set
(
std
::
move
(
hv_with_event
));
};
};
return
output_callback
(
std
::
move
(
f
),
std
::
move
(
inputs
),
true
);
return
output_callback
(
std
::
move
(
f
),
std
::
move
(
inputs
),
true
,
true
);
});
});
m
.
def
(
"attr_output_callback"
,
[
output_callback
](
std
::
shared_ptr
<
Rendezvous
<
TensorAttr
>>
p
,
std
::
vector
<
cg
::
VarNode
*>
inputs
)
{
m
.
def
(
"attr_output_callback"
,
[
output_callback
](
std
::
shared_ptr
<
Rendezvous
<
TensorAttr
>>
p
,
std
::
vector
<
cg
::
VarNode
*>
inputs
)
{
...
...
imperative/src/impl/opr_utility.cpp
浏览文件 @
3246ee5e
...
@@ -144,13 +144,24 @@ cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const {
...
@@ -144,13 +144,24 @@ cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const {
prop
->
add_flag
(
NodeProp
::
Flag
::
NO_AUTOMATIC_DUP
);
prop
->
add_flag
(
NodeProp
::
Flag
::
NO_AUTOMATIC_DUP
);
SmallVector
<
NodeProp
::
DepType
>
dep_types
(
input
().
size
(),
SmallVector
<
NodeProp
::
DepType
>
dep_types
(
input
().
size
(),
NodeProp
::
DepType
::
DEV_COMP_ORDER
);
NodeProp
::
DepType
::
DEV_COMP_ORDER
);
dep_types
[
0
]
=
NodeProp
::
DepType
::
DEV_VALUE
;
using
IT
=
cg
::
static_infer
::
InferType
;
auto
host_value_avail
=
[
&
]()
->
bool
{
auto
inp
=
input
(
0
);
auto
it
=
owner_graph
()
->
static_infer_manager
().
get_infer_type
(
inp
).
value
;
return
it
&
(
IT
::
CONST
|
IT
::
RT_STATIC
|
IT
::
MISSING_INP
);
};
m_use_host_value
=
m_param
.
prefer_host_value
&&
host_value_avail
();
dep_types
[
0
]
=
m_use_host_value
?
NodeProp
::
DepType
::
HOST_VALUE
:
NodeProp
::
DepType
::
DEV_VALUE
;
prop
->
reset_dep_type
(
input
(),
dep_types
);
prop
->
reset_dep_type
(
input
(),
dep_types
);
return
prop
;
return
prop
;
}
}
void
OutputCallback
::
scn_do_execute
()
{
void
OutputCallback
::
scn_do_execute
()
{
if
(
m_use_host_value
)
{
m_param
.
callback
(
owner_graph
()
->
static_infer_manager
().
infer_value
(
input
(
0
)));
}
else
{
m_param
.
callback
(
input
(
0
)
->
dev_tensor
());
m_param
.
callback
(
input
(
0
)
->
dev_tensor
());
}
}
}
cg
::
OperatorNodeBase
*
OutputCallback
::
shallow_copy
(
cg
::
OperatorNodeBase
*
OutputCallback
::
shallow_copy
(
...
...
imperative/src/include/megbrain/imperative/opr_utility.h
浏览文件 @
3246ee5e
...
@@ -60,7 +60,8 @@ public:
...
@@ -60,7 +60,8 @@ public:
using
callback_t
=
thin_function
<
void
(
DeviceTensorND
)
>
;
using
callback_t
=
thin_function
<
void
(
DeviceTensorND
)
>
;
struct
Param
{
struct
Param
{
callback_t
callback
;
callback_t
callback
;
bool
borrow
=
false
;
bool
borrow
=
false
;
// do not obtain shared ownership on DeviceTensorND
bool
prefer_host_value
=
false
;
// use host value when possible
};
};
OutputCallback
(
Param
param
,
OutputCallback
(
Param
param
,
const
VarNodeArray
&
inputs
,
const
VarNodeArray
&
inputs
,
...
@@ -81,6 +82,7 @@ protected:
...
@@ -81,6 +82,7 @@ protected:
NodeProp
*
do_make_node_prop
()
const
override
;
NodeProp
*
do_make_node_prop
()
const
override
;
private:
private:
Param
m_param
;
Param
m_param
;
mutable
bool
m_use_host_value
;
};
};
MGB_DEFINE_OPR_CLASS
(
NopCallback
,
cg
::
OperatorNodeBase
)
// {
MGB_DEFINE_OPR_CLASS
(
NopCallback
,
cg
::
OperatorNodeBase
)
// {
...
...
imperative/src/test/opr_utility.cpp
浏览文件 @
3246ee5e
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
#include "megbrain/opr/io.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/utility.h"
#include "megbrain/opr/utility.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/helper.h"
using
namespace
mgb
;
using
namespace
mgb
;
...
@@ -50,6 +51,27 @@ TEST(TestOprUtility, OutputCallback) {
...
@@ -50,6 +51,27 @@ TEST(TestOprUtility, OutputCallback) {
MGB_ASSERT_TENSOR_EQ
(
hy
,
*
hx
);
MGB_ASSERT_TENSOR_EQ
(
hy
,
*
hx
);
}
}
TEST
(
TestOprUtility
,
OutputCallbackPreferHost
)
{
HostTensorGenerator
<>
gen
;
auto
hx
=
gen
({
2
,
3
});
auto
graph
=
ComputingGraph
::
make
();
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
hx
);
x
=
opr
::
GetVarShape
::
make
(
x
);
HostTensorND
hy
;
auto
callback
=
[
&
hy
](
DeviceTensorND
dv
)
{
hy
.
copy_from
(
dv
);};
opr
::
OutputCallback
::
Param
param
{
callback
};
param
.
prefer_host_value
=
true
;
auto
dummy
=
opr
::
OutputCallback
::
make
(
param
,
x
);
auto
y
=
opr
::
VirtualDep
::
make
({
x
,
dummy
});
ComputingGraph
::
OutputSpec
outspec
{{
y
,
[](
DeviceTensorND
&
){}}};
auto
func
=
graph
->
compile
(
outspec
);
func
->
execute
();
ASSERT_TRUE
(
hy
.
comp_node
()
==
CompNode
::
default_cpu
());
ASSERT_EQ
(
hy
.
ptr
<
int
>
()[
0
],
2
);
ASSERT_EQ
(
hy
.
ptr
<
int
>
()[
1
],
3
);
}
TEST
(
TestOprUtility
,
NopCallback
)
{
TEST
(
TestOprUtility
,
NopCallback
)
{
HostTensorGenerator
<>
gen
;
HostTensorGenerator
<>
gen
;
auto
hx
=
gen
({
2
,
3
});
auto
hx
=
gen
({
2
,
3
});
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录