Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c17d6038
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
407
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
c17d6038
编写于
3年前
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): allow output tensor's ptr change when record
GitOrigin-RevId: c610c8bf9a6d97885d9da3f2e447b57f25747dee
上级
26634db7
master
HuaHua404-patch-1
HuaHua404-patch-2
HuaHua404-patch-3
HuaHua404-patch-4
add-tools
dev-support-lite-fork-debug-mode
docstring-reshape
release-1.10
release-1.11
release-1.11.1
release-1.12.0
release-1.12.1
release-1.12.2
release-1.12.3
release-1.12.4
release-1.12.5
release-1.13.0
release-1.13.1
release-1.7
release-1.8
release-1.9
revert-410-docstring-zeros
revert-411-add-tools
test-try-import
tmp-test
try-import
v1.13.1
v1.13.0
v1.12.4
v1.12.3
v1.12.2
v1.12.1
v1.12.0
v1.11.1
v1.11.0
v1.10.0
v1.9.1
v1.9.0
v1.8.2
v1.8.1
v1.8.1.m1
v1.8.0
v1.7.2.m1
v1.7.1.m1
v1.7.0
v1.7.0.m1
无相关合并请求
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
197 addition
and
15 deletion
+197
-15
lite/src/mge/network_impl.cpp
lite/src/mge/network_impl.cpp
+3
-0
src/core/impl/comp_node/comp_node.cpp
src/core/impl/comp_node/comp_node.cpp
+17
-0
src/core/impl/comp_node/cpu/comp_node.cpp
src/core/impl/comp_node/cpu/comp_node.cpp
+6
-0
src/core/impl/comp_node_env.cpp
src/core/impl/comp_node_env.cpp
+31
-0
src/core/impl/graph/cg_impl.cpp
src/core/impl/graph/cg_impl.cpp
+22
-2
src/core/include/megbrain/comp_node.h
src/core/include/megbrain/comp_node.h
+14
-0
src/core/include/megbrain/comp_node_env.h
src/core/include/megbrain/comp_node_env.h
+17
-4
src/core/test/comp_node_helper.cpp
src/core/test/comp_node_helper.cpp
+58
-5
src/core/test/graph/misc.cpp
src/core/test/graph/misc.cpp
+13
-2
src/core/test/graph/multi_thread.cpp
src/core/test/graph/multi_thread.cpp
+5
-1
src/plugin/test/opr_io_dump.cpp
src/plugin/test/opr_io_dump.cpp
+11
-1
未找到文件。
lite/src/mge/network_impl.cpp
浏览文件 @
c17d6038
...
...
@@ -207,6 +207,9 @@ void NetworkImplDft::use_tensorrt() {
//! set the callback in async model
void
NetworkImplDft
::
set_async_callback
(
const
AsyncCallback
&
callback
)
{
LITE_ASSERT
(
!
m_is_cpu_inplace_mode
,
"cpu inplace mode not support async mode"
);
LITE_ASSERT
(
m_user_config
->
options
.
comp_node_seq_record_level
==
0
,
"record mode not support async mode"
);
LITE_ASSERT
(
m_user_config
->
device_type
==
LiteDeviceType
::
LITE_CPU
||
m_user_config
->
device_type
==
LiteDeviceType
::
LITE_CUDA
,
...
...
This diff is collapsed.
Click to expand it.
src/core/impl/comp_node/comp_node.cpp
浏览文件 @
c17d6038
...
...
@@ -659,4 +659,21 @@ void CompNode::ImplBase::add_callback(megdnn::thin_function<void()>&&) {
locator
().
to_string
().
c_str
());
}
void
CompNode
::
ImplBase
::
enable_dispatch
()
{
mgb_throw
(
MegBrainError
,
"Unsupported add callback to "
"comp node %s"
,
locator
().
to_string
().
c_str
());
}
void
CompNode
::
ImplBase
::
disable_dispatch
(
bool
*
flag
)
{
MGB_MARK_USED_VAR
(
flag
);
mgb_throw
(
MegBrainError
,
"Unsupported add callback to "
"comp node %s"
,
locator
().
to_string
().
c_str
());
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
This diff is collapsed.
Click to expand it.
src/core/impl/comp_node/cpu/comp_node.cpp
浏览文件 @
c17d6038
...
...
@@ -810,6 +810,12 @@ public:
task
();
}
}
void
enable_dispatch
()
override
{
m_env
.
cpu_env
().
enable_dispatch
();
}
void
disable_dispatch
(
bool
*
flag
)
override
{
m_env
.
cpu_env
().
disable_dispatch
(
flag
);
}
};
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
CompNodeRecorderImpl
);
#if MGB_HAVE_THREAD
...
...
This diff is collapsed.
Click to expand it.
src/core/impl/comp_node_env.cpp
浏览文件 @
c17d6038
...
...
@@ -474,4 +474,35 @@ void CompNodeEnv::on_bad_device_type(DeviceType expected) const {
MGB_VERSION_SYMBOL3
(
MEGDNN
,
MEGDNN_MAJOR
,
MEGDNN_MINOR
,
MEGDNN_PATCH
);
void
CompNodeEnv
::
CpuEnv
::
enable_dispatch
()
{
do_task_inplace
=
nullptr
;
}
void
CompNodeEnv
::
CpuEnv
::
disable_dispatch
(
bool
*
flag
)
{
do_task_inplace
=
flag
;
}
void
CompNodeEnv
::
CpuEnv
::
dispatch
(
Task
&&
task
)
const
{
if
(
do_task_inplace
&&
*
do_task_inplace
)
{
task
();
}
else
{
dispatcher
->
dispatch
(
std
::
move
(
task
));
}
}
void
CompNodeEnv
::
CpuEnv
::
dispatch
(
MultiThreadingTask
&&
task
,
size_t
parallelism
)
const
{
if
(
do_task_inplace
&&
*
do_task_inplace
)
{
for
(
size_t
i
=
0
;
i
<
parallelism
;
++
i
)
{
task
(
i
,
0
);
}
}
else
{
dispatcher
->
dispatch
(
std
::
move
(
task
),
parallelism
);
}
}
#if MGB_HAVE_THREAD
MGB_THREAD_LOCAL_PTR
(
bool
)
CompNodeEnv
::
CpuEnv
::
do_task_inplace
=
nullptr
;
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
This diff is collapsed.
Click to expand it.
src/core/impl/graph/cg_impl.cpp
浏览文件 @
c17d6038
...
...
@@ -168,12 +168,32 @@ MGB_DEFINE_OPR_CLASS(
ComputingGraphImpl
::
CallbackCaller
,
SingleCNOperatorNodeBase
)
// {
std
::
vector
<
std
::
vector
<
ComputingGraph
::
Callback
>>
m_cb
;
//! CallbackCaller supports change memory address in output tensor record mode(only
//! on CPU). The whole callback will be dispatched(like dispatching tensor copy
//! instead of dispatching memcpy).
//! Side effect: sync() is not supported in callback anymore. Users should call
//! func->wait() instead out of callback to sync data from Device to Host.
//! Note : only record level 1 supports change memory address in output tensor.
//! HostTensor captured in callback should not on cpu default.
void
scn_do_execute
()
override
{
for
(
size_t
i
=
0
;
i
<
input
().
size
();
++
i
)
{
auto
&&
in
=
input
(
i
)
->
dev_tensor
();
for
(
auto
&&
callback
:
m_cb
[
i
])
{
// const cast for backward API compatibility
callback
(
const_cast
<
DeviceTensorND
&>
(
in
));
if
(
this
->
owner_graph
()
->
options
().
comp_node_seq_record_level
==
1
&&
in
.
comp_node
().
device_type
()
==
CompNode
::
DeviceType
::
CPU
&&
in
.
comp_node
()
!=
CompNode
::
default_cpu
())
{
auto
record_cb
=
[
&
in
,
&
callback
]()
{
auto
comp_node
=
in
.
comp_node
();
bool
do_task_inplace
=
true
;
comp_node
.
disable_dispatch
(
&
do_task_inplace
);
callback
(
const_cast
<
DeviceTensorND
&>
(
in
));
comp_node
.
enable_dispatch
();
};
in
.
comp_node
().
add_callback
(
record_cb
);
}
else
{
// const cast for backward API compatibility
callback
(
const_cast
<
DeviceTensorND
&>
(
in
));
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/core/include/megbrain/comp_node.h
浏览文件 @
c17d6038
...
...
@@ -412,6 +412,16 @@ public:
return
m_impl
->
add_callback
(
std
::
move
(
cb
));
}
/*!
* enable dispatcher
*/
void
enable_dispatch
()
{
m_impl
->
enable_dispatch
();
}
/*!
* disable dispatcher so that task will be done inplace
*/
void
disable_dispatch
(
bool
*
flag
)
{
m_impl
->
disable_dispatch
(
flag
);
}
enum
class
Flag
:
uint32_t
{
//! Whether computing recorder is supported on this comp node (i.e.
//! whether non-zero comp_node_seq_record_level is allowed)
...
...
@@ -532,6 +542,10 @@ protected:
virtual
void
add_callback
(
megdnn
::
thin_function
<
void
()
>&&
);
virtual
void
enable_dispatch
();
virtual
void
disable_dispatch
(
bool
*
flag
);
virtual
uint64_t
get_uid
()
{
mgb_throw
(
MegBrainError
,
"get_uid is not impl yet"
);
};
...
...
This diff is collapsed.
Click to expand it.
src/core/include/megbrain/comp_node_env.h
浏览文件 @
c17d6038
...
...
@@ -503,12 +503,19 @@ public:
using
AffinityCallBack
=
thin_function
<
void
(
size_t
)
>
;
std
::
shared_ptr
<
CPUDispatcher
>
dispatcher
;
#if MGB_HAVE_THREAD
static
MGB_THREAD_LOCAL_PTR
(
bool
)
do_task_inplace
;
#else
bool
*
do_task_inplace
=
nullptr
;
#endif
void
dispatch
(
Task
&&
task
)
const
{
dispatcher
->
dispatch
(
std
::
move
(
task
));
}
void
enable_dispatch
();
void
dispatch
(
MultiThreadingTask
&&
task
,
size_t
parallelism
)
const
{
dispatcher
->
dispatch
(
std
::
move
(
task
),
parallelism
);
}
void
disable_dispatch
(
bool
*
flag
);
void
dispatch
(
Task
&&
task
)
const
;
void
dispatch
(
MultiThreadingTask
&&
task
,
size_t
parallelism
)
const
;
void
set_affinity
(
AffinityCallBack
&&
cb
)
const
{
dispatcher
->
set_affinity
(
std
::
move
(
cb
));
...
...
@@ -521,6 +528,12 @@ public:
return
m_cpu_env
;
}
CpuEnv
&
cpu_env
()
{
if
(
mgb_unlikely
(
m_property
.
type
!=
DeviceType
::
CPU
))
on_bad_device_type
(
DeviceType
::
CPU
);
return
m_cpu_env
;
}
//! init this as a cpu env
void
init_cpu
(
const
CpuEnv
&
env
,
CompNode
comp_node
);
...
...
This diff is collapsed.
Click to expand it.
src/core/test/comp_node_helper.cpp
浏览文件 @
c17d6038
...
...
@@ -44,7 +44,7 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) {
graph
->
options
().
fake_next_exec
=
true
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
}
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
)});
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
,
false
)});
if
(
fake_first
)
{
func
->
execute
();
// first exec
}
...
...
@@ -55,6 +55,8 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) {
}
host_x
->
copy_from_fixlayout
(
*
gen
(
host_x
->
shape
(),
cn
));
func
->
execute
();
func
->
wait
();
host_z
.
sync
();
auto
expect
=
eval_conv_cpu
<
opr
::
Convolution
>
(
*
host_x
,
*
host_y
,
param
);
MGB_ASSERT_TENSOR_NEAR
(
expect
,
host_z
,
1e-3
)
<<
"iter "
<<
iter
;
}
...
...
@@ -70,6 +72,28 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) {
ASSERT_EQ
(
executed
[
2
],
change
);
// create new recorder, exec with recorder
ASSERT_EQ
(
executed
[
3
],
change
+
1
);
//! then we change host_z's ptr each time and check result
HostTensorND
host_iter
;
host_iter
.
copy_from
(
host_z
);
std
::
vector
<
std
::
shared_ptr
<
HostTensorND
>>
m_hosts
(
10
);
for
(
size_t
i
=
0
;
i
<
10
;
i
++
)
{
m_hosts
[
i
]
=
gen
(
host_z
.
shape
(),
host_z
.
comp_node
());
}
iter
=
0
;
for
(;
iter
<
10
;
++
iter
)
{
auto
host_tmp
=
m_hosts
[
iter
];
auto
host_z_storage
=
host_z
.
storage
();
auto
origin_ptr
=
host_z_storage
.
raw_storage
();
host_z_storage
.
reset
(
host_z
.
comp_node
(),
host_z_storage
.
size
(),
host_tmp
->
storage
().
raw_storage
());
auto
changed_ptr
=
host_z_storage
.
raw_storage
();
ASSERT_TRUE
(
origin_ptr
!=
changed_ptr
);
func
->
execute
();
func
->
wait
();
MGB_ASSERT_TENSOR_NEAR
(
host_iter
,
host_z
,
1e-3
)
<<
"iter "
<<
iter
;
}
}
void
run_comp_seq_rec_basic_level2
(
CompNode
cn
)
{
...
...
@@ -154,7 +178,7 @@ void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) {
w
=
opr
::
Elemwise
::
make
({
x
,
y
,
z
},
opr
::
Elemwise
::
Mode
::
FUSE_MUL_ADD3
);
HostTensorND
host_w
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
w
,
host_w
)});
auto
func
=
graph
->
compile
({
make_callback_copy
(
w
,
host_w
,
false
)});
if
(
fake_first
)
{
func
->
execute
();
}
...
...
@@ -166,9 +190,30 @@ void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) {
}
host_x
->
copy_from
(
*
gen
(
host_x
->
shape
(),
cn
));
func
->
execute
();
func
->
wait
();
auto
expect
=
check
();
MGB_ASSERT_TENSOR_EQ
(
expect
,
host_w
)
<<
"iter "
<<
i
;
}
//! then we change host_z's ptr each time and check result
HostTensorND
host_iter
;
host_iter
.
copy_from
(
host_w
);
std
::
vector
<
std
::
shared_ptr
<
HostTensorND
>>
m_hosts
(
10
);
for
(
size_t
i
=
0
;
i
<
10
;
i
++
)
{
m_hosts
[
i
]
=
gen
(
host_w
.
shape
(),
host_w
.
comp_node
());
}
for
(
size_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
auto
host_tmp
=
m_hosts
[
iter
];
auto
host_w_storage
=
host_w
.
storage
();
auto
origin_ptr
=
host_w_storage
.
raw_storage
();
host_w_storage
.
reset
(
host_w
.
comp_node
(),
host_w_storage
.
size
(),
host_tmp
->
storage
().
raw_storage
());
auto
changed_ptr
=
host_w_storage
.
raw_storage
();
ASSERT_TRUE
(
origin_ptr
!=
changed_ptr
);
func
->
execute
();
func
->
wait
();
MGB_ASSERT_TENSOR_EQ
(
host_iter
,
host_w
)
<<
"iter "
<<
iter
;
}
}
void
run_level2
(
CompNode
cn
,
bool
use_multi_holder
)
{
...
...
@@ -381,6 +426,9 @@ void run<sync_from_func>(CompNode cn) {
HostTensorND
host_y
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
comp_node_seq_record_level
=
level
;
if
(
level
==
1
)
{
sync
=
false
;
}
auto
cb
=
[
&
](
const
DeviceTensorND
&
dv
)
{
host_y
.
copy_from
(
dv
);
if
(
sync
)
{
...
...
@@ -418,6 +466,9 @@ void run<cb_non_contig>(CompNode cn) {
HostTensorND
host_y
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
comp_node_seq_record_level
=
level
;
if
(
level
==
1
)
{
sync
=
false
;
}
auto
cb
=
[
&
](
const
DeviceTensorND
&
dv
)
{
host_y
.
copy_from
(
dv
);
if
(
sync
)
{
...
...
@@ -428,8 +479,8 @@ void run<cb_non_contig>(CompNode cn) {
if
(
level
==
2
)
{
ComputingGraph
::
assert_destroy
(
graph
);
}
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
host_x
->
copy_from
(
*
gen
(
host_x
->
shape
()));
for
(
int
k
=
0
;
k
<
3
;
++
k
)
{
host_x
->
copy_from
(
*
gen
(
host_x
->
shape
()
,
cn
));
HostTensorND
expect
{
host_x
->
comp_node
(),
{
5
,
4
}};
auto
px
=
host_x
->
ptr
<
float
>
(),
py
=
expect
.
ptr
<
float
>
();
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
@@ -504,14 +555,16 @@ void run<multi_recorder_run>(CompNode cn) {
y
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_y
),
z
=
opr
::
Convolution
::
make
(
x
,
y
,
param
);
graph
->
options
().
comp_node_seq_record_level
=
1
;
return
graph
->
compile
({
make_callback_copy
(
z
,
host_z_v
[
graph_id
])});
return
graph
->
compile
({
make_callback_copy
(
z
,
host_z_v
[
graph_id
]
,
false
)});
};
funcs
.
push_back
(
gen_graph
(
0
));
funcs
.
push_back
(
gen_graph
(
1
));
for
(
int
iter
=
0
;
iter
<
10
;
++
iter
)
{
host_x
->
copy_from_fixlayout
(
*
gen
(
host_x
->
shape
(),
cn
));
funcs
[
0
]
->
execute
();
funcs
[
0
]
->
wait
();
funcs
[
1
]
->
execute
();
funcs
[
1
]
->
wait
();
auto
expect
=
eval_conv_cpu
<
opr
::
Convolution
>
(
*
host_x
,
*
host_y
,
param
);
MGB_ASSERT_TENSOR_NEAR
(
expect
,
host_z_v
[
0
],
1e-3
)
<<
"iter "
<<
iter
;
MGB_ASSERT_TENSOR_NEAR
(
expect
,
host_z_v
[
1
],
1e-3
)
<<
"iter "
<<
iter
;
...
...
This diff is collapsed.
Click to expand it.
src/core/test/graph/misc.cpp
浏览文件 @
c17d6038
...
...
@@ -1375,13 +1375,17 @@ TEST(TestGraph, CompNodeFinalize) {
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
comp_node_seq_record_level
=
rec
;
}
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
)});
auto
sync
=
(
rec
!=
1
);
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
,
sync
)});
if
(
rec
==
2
)
{
ComputingGraph
::
assert_destroy
(
graph
);
}
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
host_x
->
copy_from
(
*
gen
({
1
},
cn
));
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
MGB_ASSERT_FLOAT_EQ
(
host_x
->
ptr
<
float
>
()[
0
]
+
host_y
->
ptr
<
float
>
()[
0
],
host_z
.
ptr
<
float
>
()[
0
]);
...
...
@@ -1933,6 +1937,7 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) {
#endif
graph
->
options
().
graph_opt
.
weight_preprocess
=
true
;
graph
->
options
().
comp_node_seq_record_level
=
record_level
;
auto
sync
=
(
record_level
!=
1
);
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
)
{
return
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
,
cn
)).
rename
(
name
);
};
...
...
@@ -1970,11 +1975,17 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) {
});
HostTensorND
host_y
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
)});
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
,
sync
)});
//! flag the no need memory of var
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
//! free the no need memory of var
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
auto
check
=
[
&
](
SymbolVar
v
)
{
ASSERT_TRUE
(
v
.
node
()
->
contain_flag
(
VarNode
::
Flag
::
MEMORY_NO_NEED
));
ASSERT_TRUE
(
v
.
node
()
->
dev_tensor
().
empty
());
...
...
This diff is collapsed.
Click to expand it.
src/core/test/graph/multi_thread.cpp
浏览文件 @
c17d6038
...
...
@@ -213,9 +213,13 @@ TEST(TestGraph, MultiThreadRecorder) {
z
=
opr
::
Convolution
::
make
(
x
,
y
,
param
);
graph
->
options
().
comp_node_seq_record_level
=
record_level
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
)});
auto
sync
=
(
record_level
!=
1
);
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
,
sync
)});
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
}
auto
expect
=
eval_conv_cpu
<
opr
::
Convolution
>
(
*
host_x
,
*
host_y
,
param
);
MGB_ASSERT_TENSOR_NEAR
(
expect
,
host_z
,
1e-3
);
...
...
This diff is collapsed.
Click to expand it.
src/plugin/test/opr_io_dump.cpp
浏览文件 @
c17d6038
...
...
@@ -50,6 +50,7 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) {
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
comp_node_seq_record_level
=
record
;
graph
->
options
().
graph_opt_level
=
0
;
auto
sync
=
(
record
!=
1
);
auto
plug
=
plugin_maker
(
graph
.
get
(),
record
);
// make a non-contiguous value, also introduce some shape dependencies
...
...
@@ -76,11 +77,14 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) {
cg
::
DepOprIter
{
cb_rename
}.
add
(
y
);
HostTensorND
host_y
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
)});
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
,
sync
)});
if
(
record
==
2
)
{
ComputingGraph
::
assert_destroy
(
graph
);
}
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
plug
->
flush_lazy
();
MGB_ASSERT_TENSOR_EQ
(
make_expect
(),
host_y
);
...
...
@@ -91,10 +95,16 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) {
*
host_x
=
*
gen
(
host_x
->
shape
(),
cn
);
}
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
MGB_ASSERT_TENSOR_EQ
(
make_expect
(),
host_y
);
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
host_x
->
copy_from
(
*
gen
(
host_x
->
shape
(),
cn
));
func
->
execute
();
if
(
!
sync
)
{
func
->
wait
();
}
MGB_ASSERT_TENSOR_EQ
(
make_expect
(),
host_y
);
}
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录