Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
8b764934
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8b764934
编写于
11月 01, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(lite): lite support output var no copy option
GitOrigin-RevId: 5b9488cb93fecb70f0ca0018edde29a6039f5510
上级
7642f66d
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
214 addition
and
21 deletion
+214
-21
lite/include/lite/network.h
lite/include/lite/network.h
+1
-0
lite/lite-c/include/lite-c/network_c.h
lite/lite-c/include/lite-c/network_c.h
+1
-0
lite/lite-c/src/network.cpp
lite/lite-c/src/network.cpp
+4
-1
lite/pylite/megenginelite/network.py
lite/pylite/megenginelite/network.py
+3
-0
lite/src/mge/network_impl.cpp
lite/src/mge/network_impl.cpp
+57
-7
lite/src/mge/network_impl.h
lite/src/mge/network_impl.h
+6
-2
lite/src/mge/tensor_impl.cpp
lite/src/mge/tensor_impl.cpp
+37
-0
lite/src/mge/tensor_impl.h
lite/src/mge/tensor_impl.h
+13
-0
lite/src/network.cpp
lite/src/network.cpp
+4
-0
lite/test/test_network.cpp
lite/test/test_network.cpp
+67
-0
src/core/impl/graph/cg_impl.cpp
src/core/impl/graph/cg_impl.cpp
+3
-4
src/core/impl/graph/var_node.cpp
src/core/impl/graph/var_node.cpp
+11
-4
src/core/include/megbrain/graph/cg.h
src/core/include/megbrain/graph/cg.h
+1
-1
src/core/include/megbrain/graph/var_node.h
src/core/include/megbrain/graph/var_node.h
+4
-0
src/gopt/test/no_memory_copy.cpp
src/gopt/test/no_memory_copy.cpp
+2
-2
未找到文件。
lite/include/lite/network.h
浏览文件 @
8b764934
...
...
@@ -93,6 +93,7 @@ struct LITE_API Options {
bool
const_shape
=
false
;
bool
force_dynamic_alloc
=
false
;
bool
force_output_dynamic_alloc
=
false
;
bool
force_output_use_user_specified_memory
=
false
;
bool
no_profiling_on_shape_change
=
false
;
uint8_t
jit_level
=
0
;
uint8_t
comp_node_seq_record_level
=
0
;
...
...
lite/lite-c/include/lite-c/network_c.h
浏览文件 @
8b764934
...
...
@@ -83,6 +83,7 @@ typedef struct Options {
int
const_shape
;
int
force_dynamic_alloc
;
int
force_output_dynamic_alloc
;
int
force_output_use_user_specified_memory
;
int
no_profiling_on_shape_change
;
int
jit_level
;
int
comp_node_seq_record_level
;
...
...
lite/lite-c/src/network.cpp
浏览文件 @
8b764934
...
...
@@ -29,6 +29,7 @@ const LiteOptions default_option = {
.
const_shape
=
false
,
.
force_dynamic_alloc
=
false
,
.
force_output_dynamic_alloc
=
false
,
.
force_output_use_user_specified_memory
=
false
,
.
no_profiling_on_shape_change
=
false
,
.
jit_level
=
0
,
.
comp_node_seq_record_level
=
0
,
...
...
@@ -122,7 +123,9 @@ lite::Config convert_to_lite_config(const LiteConfig c_config) {
lite_config
.
options
.
var_sanity_check_first_run
=
c_config
.
options
.
var_sanity_check_first_run
;
lite_config
.
options
.
const_shape
=
c_config
.
options
.
const_shape
;
lite_config
.
options
.
force_dynamic_alloc
=
c_config
.
options
.
const_shape
;
lite_config
.
options
.
force_dynamic_alloc
=
c_config
.
options
.
force_dynamic_alloc
;
lite_config
.
options
.
force_output_use_user_specified_memory
=
c_config
.
options
.
force_output_use_user_specified_memory
;
lite_config
.
options
.
force_output_dynamic_alloc
=
c_config
.
options
.
force_output_dynamic_alloc
;
lite_config
.
options
.
no_profiling_on_shape_change
=
...
...
lite/pylite/megenginelite/network.py
浏览文件 @
8b764934
...
...
@@ -29,6 +29,7 @@ class LiteOptions(Structure):
(
"const_shape"
,
c_int
),
(
"force_dynamic_alloc"
,
c_int
),
(
"force_output_dynamic_alloc"
,
c_int
),
(
"force_output_use_user_specified_memory"
,
c_int
),
(
"no_profiling_on_shape_change"
,
c_int
),
(
"jit_level"
,
c_int
),
(
"comp_node_seq_record_level"
,
c_int
),
...
...
@@ -52,6 +53,7 @@ class LiteOptions(Structure):
self
.
const_shape
=
False
self
.
force_dynamic_alloc
=
False
self
.
force_output_dynamic_alloc
=
False
self
.
force_output_use_user_specified_memory
=
False
self
.
no_profiling_on_shape_change
=
False
self
.
jit_level
=
0
self
.
comp_node_seq_record_level
=
0
...
...
@@ -67,6 +69,7 @@ class LiteOptions(Structure):
"const_shape"
:
bool
(
self
.
const_shape
),
"force_dynamic_alloc"
:
bool
(
self
.
force_dynamic_alloc
),
"force_output_dynamic_alloc"
:
bool
(
self
.
force_output_dynamic_alloc
),
"force_output_nocopy"
:
bool
(
self
.
force_output_nocopy
),
"no_profiling_on_shape_change"
:
bool
(
self
.
no_profiling_on_shape_change
),
"jit_level"
:
self
.
jit_level
,
"comp_node_seq_record_level"
:
self
.
comp_node_seq_record_level
,
...
...
lite/src/mge/network_impl.cpp
浏览文件 @
8b764934
...
...
@@ -84,6 +84,9 @@ void NetworkImplDft::application_config() {
m_load_config
.
const_var_shape
=
m_user_config
->
options
.
const_shape
;
ConfigOption
(
force_dynamic_alloc
,
force_dynamic_alloc
);
ConfigOption
(
force_output_dynamic_alloc
,
force_output_dynamic_alloc
);
ConfigOption
(
force_output_use_user_specified_memory
,
force_output_use_user_specified_memory
);
ConfigOption
(
no_profiling_on_shape_change
,
no_profiling_on_shape_change
);
LITE_ASSERT
(
m_user_config
->
options
.
jit_level
==
0
||
...
...
@@ -250,7 +253,13 @@ void NetworkImplDft::make_output_spec() {
}
}
};
//! if write to user-specified memory, the CallbackCaller must be nullptr.
if
(
m_user_config
->
options
.
force_output_use_user_specified_memory
||
m_user_config
->
options
.
force_output_dynamic_alloc
)
{
m_output_spec
.
emplace_back
(
load_out
,
nullptr
);
}
else
{
m_output_spec
.
emplace_back
(
load_out
,
std
::
move
(
cb
));
}
}
else
{
LITE_THROW
(
ssprintf
(
"no output named : %s in the mode"
,
out
.
name
.
c_str
()));
}
...
...
@@ -444,8 +453,7 @@ void NetworkImplDft::set_io(const NetworkIO& network_io) {
}
}
void
NetworkImplDft
::
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
mgb
::
cg
::
SymbolVar
var
)
{
void
NetworkImplDft
::
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
Var
var
)
{
auto
&&
static_infer_mgr
=
m_load_config
.
comp_graph
->
static_infer_manager
();
auto
infer_trait
=
var
.
node
()
->
get_static_infer_trait
();
if
(
std
::
get
<
0
>
(
infer_trait
))
{
...
...
@@ -455,9 +463,13 @@ void NetworkImplDft::try_infer_tensor_layout(
"Lite infer output shape failed, maybe the model is "
"dynamic "
"shape.
\n
"
);
LITE_ASSERT
(
!
m_user_config
->
options
.
force_output_use_user_specified_memory
,
"force_output_use_user_specified_memory can't be used when output "
"shape can't be derived."
);
return
;
}
Layout
layout
=
to_lite_layout
(
mgb
::
TensorLayout
{
*
shape
,
var
.
dtype
()});
Layout
layout
=
to_lite_layout
(
TensorLayout
{
*
shape
,
var
.
dtype
()});
tensor
->
set_layout
(
layout
);
}
}
...
...
@@ -559,8 +571,7 @@ void NetworkImplDft::update_output() {
out_it
!=
m_network_io
->
outputs
.
end
();)
{
if
(
std
::
find_if
(
m_load_result
.
output_var_list
.
begin
(),
m_load_result
.
output_var_list
.
end
(),
[
out_it
](
const
mgb
::
SymbolVar
var
)
{
m_load_result
.
output_var_list
.
end
(),
[
out_it
](
const
SymbolVar
var
)
{
return
var
.
node
()
->
name
()
==
out_it
->
name
;
})
==
m_load_result
.
output_var_list
.
end
())
{
LITE_LOG
(
"%s is not the network output, ignore it."
,
out_it
->
name
.
c_str
());
...
...
@@ -584,7 +595,7 @@ void NetworkImplDft::update_output() {
out_it
->
lite_tensor
=
std
::
make_shared
<
Tensor
>
(
device_id
,
stream_id
,
device_type
);
}
mgb
::
SymbolVar
var
;
SymbolVar
var
;
for
(
auto
&&
out_var
:
m_load_result
.
output_var_list
)
{
if
(
out_var
.
node
()
->
name
()
==
out_it
->
name
)
{
var
=
out_var
;
...
...
@@ -592,10 +603,12 @@ void NetworkImplDft::update_output() {
}
}
try_infer_tensor_layout
(
out_it
->
lite_tensor
,
var
);
output_tensor_copy_optimize
(
var
,
out_it
->
lite_tensor
);
}
//! user not set, use default output
}
else
{
for
(
auto
&&
out
:
m_load_result
.
output_var_list
)
{
std
::
shared_ptr
<
Tensor
>
lite_tensor
=
nullptr
;
auto
it
=
std
::
find_if
(
m_network_io
->
outputs
.
begin
(),
m_network_io
->
outputs
.
end
(),
[
&
out
](
const
IOInner
io
)
{
return
io
.
name
==
out
.
node
()
->
name
();
});
...
...
@@ -608,6 +621,7 @@ void NetworkImplDft::update_output() {
std
::
make_shared
<
Tensor
>
(
device_id
,
stream_id
,
device_type
);
}
try_infer_tensor_layout
(
it
->
lite_tensor
,
out
);
lite_tensor
=
it
->
lite_tensor
;
}
else
{
IOInner
output
;
output
.
name
=
out
.
node
()
->
name
();
...
...
@@ -615,8 +629,44 @@ void NetworkImplDft::update_output() {
device_id
,
stream_id
,
device_type
,
true
);
m_network_io
->
outputs
.
push_back
({
output
});
try_infer_tensor_layout
(
output
.
lite_tensor
,
out
);
lite_tensor
=
output
.
lite_tensor
;
}
output_tensor_copy_optimize
(
out
,
lite_tensor
);
}
}
}
void
NetworkImplDft
::
output_tensor_copy_optimize
(
Var
var
,
std
::
shared_ptr
<
Tensor
>
tensor
)
{
LITE_ASSERT
(
!
(
m_user_config
->
options
.
force_output_use_user_specified_memory
&&
m_user_config
->
options
.
force_output_dynamic_alloc
),
"Can't set force_output_use_user_specified_memory and "
"force_output_dynamic_alloc at the same time."
);
if
(
m_user_config
->
options
.
force_output_use_user_specified_memory
)
{
TensorHelper
::
implement
(
tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
set_reset_callback
([
var
](
TensorImplDft
*
dft_tensor
)
{
dft_tensor
->
device_share_host_memory
();
auto
dv
=
dft_tensor
->
dev_tensor
().
get
();
dv
->
comp_node
(
var
.
node
()
->
comp_node
(),
true
);
var
.
node
()
->
init_mem_plan
(
dv
);
var
.
node
()
->
reset_dev_tensor_from_tensor
(
*
dv
);
});
}
if
(
m_user_config
->
options
.
force_output_dynamic_alloc
)
{
TensorHelper
::
implement
(
tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
set_get_memory_callback
([
var
](
TensorImplDft
*
dft_tensor
)
{
if
(
dft_tensor
->
is_host
())
{
auto
host_tensor
=
dft_tensor
->
m_host_tensor
;
*
host_tensor
=
HostTensorND
::
make_proxy
(
var
.
node
()
->
dev_tensor
());
}
else
{
auto
dev_tensor
=
dft_tensor
->
m_dev_tensor
;
*
dev_tensor
=
var
.
node
()
->
dev_tensor
();
}
});
}
}
...
...
lite/src/mge/network_impl.h
浏览文件 @
8b764934
...
...
@@ -12,6 +12,7 @@
#pragma once
#include "lite_build_config.h"
#include "megbrain/graph.h"
#if LITE_BUILD_WITH_MGE
#include "lite/network.h"
...
...
@@ -41,6 +42,7 @@ class NetworkImplDft final : public Network::NetworkImplBase {
public:
NetworkImplDft
()
{
m_load_config
.
comp_graph
=
mgb
::
ComputingGraph
::
make
();
}
using
S
=
megdnn
::
param
::
ExecutionPolicy
::
Strategy
;
using
Var
=
mgb
::
cg
::
SymbolVar
;
//! set the config of the network, include:
//! the inference device
//! the other inference options, such as record_level, weight_preprocess...
...
...
@@ -207,8 +209,10 @@ private:
void
compile_graph
();
//! try to infer output tensor layout
void
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
mgb
::
cg
::
SymbolVar
var
);
void
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
Var
var
);
//! optimized output tensor copy
void
output_tensor_copy_optimize
(
Var
var
,
std
::
shared_ptr
<
Tensor
>
tensor
);
private:
bool
m_async
=
false
;
...
...
lite/src/mge/tensor_impl.cpp
浏览文件 @
8b764934
...
...
@@ -149,6 +149,9 @@ Layout TensorImplDft::get_layout() const {
}
void
*
TensorImplDft
::
get_memory_ptr
()
const
{
if
(
m_get_memory_callback
)
{
m_get_memory_callback
(
const_cast
<
TensorImplDft
*>
(
this
));
}
if
(
is_host
())
{
return
static_cast
<
void
*>
(
m_host_tensor
->
raw_ptr
());
}
else
{
...
...
@@ -157,6 +160,9 @@ void* TensorImplDft::get_memory_ptr() const {
}
void
*
TensorImplDft
::
get_memory_ptr
(
const
std
::
vector
<
size_t
>&
idx
)
const
{
if
(
m_get_memory_callback
)
{
m_get_memory_callback
(
const_cast
<
TensorImplDft
*>
(
this
));
}
if
(
is_host
())
{
auto
elemsize_log
=
m_host_tensor
->
layout
().
dtype
.
size_log
();
switch
(
elemsize_log
)
{
...
...
@@ -317,6 +323,9 @@ void TensorImplDft::reset(void* prepared_data) {
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
}
if
(
m_reset_callback
)
{
m_reset_callback
(
this
);
}
}
void
TensorImplDft
::
reset
(
void
*
prepared_data
,
const
Layout
&
layout
)
{
...
...
@@ -430,6 +439,34 @@ void TensorImplDft::copy_from_mge_tensor(const mgb::DeviceTensorND& dv) {
}
}
void
TensorImplDft
::
set_reset_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
)
{
m_reset_callback
=
cb
;
}
void
TensorImplDft
::
set_get_memory_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
)
{
m_get_memory_callback
=
cb
;
}
void
TensorImplDft
::
device_share_host_memory
()
{
if
(
is_host
())
{
if
(
!
m_dev_tensor
)
{
m_dev_tensor
=
std
::
make_shared
<
mgb
::
DeviceTensorND
>
(
m_host_tensor
->
comp_node
(),
m_host_tensor
->
layout
());
}
if
(
m_host_tensor
->
raw_ptr
()
!=
m_dev_tensor
->
raw_ptr
())
{
auto
raw_storage
=
std
::
shared_ptr
<
mgb
::
dt_byte
>
(
m_host_tensor
->
raw_ptr
(),
[](
void
*
)
{});
auto
cn
=
m_host_tensor
->
comp_node
();
auto
mge_layout
=
m_host_tensor
->
layout
();
size_t
size
=
mge_layout
.
span
().
dist_byte
();
mgb
::
DeviceTensorStorage
storage
;
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
}
}
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
lite/src/mge/tensor_impl.h
浏览文件 @
8b764934
...
...
@@ -97,11 +97,22 @@ public:
//! get host tensor
std
::
shared_ptr
<
mgb
::
HostTensorND
>
host_tensor
()
const
{
return
m_host_tensor
;
}
//! get device tensor
std
::
shared_ptr
<
mgb
::
DeviceTensorND
>
dev_tensor
()
const
{
return
m_dev_tensor
;
}
//! copy from mgb tensor
void
copy_from_mge_tensor
(
const
mgb
::
DeviceTensorND
&
dv
);
//! set tensor reset callback
void
set_reset_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
);
//! set tensor get memory callback
void
set_get_memory_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
);
//! shared the same memory with host and device tensor
void
device_share_host_memory
();
public:
friend
class
NetworkImplDft
;
...
...
@@ -115,6 +126,8 @@ private:
void
set_mge_tensor_compnode
(
const
mgb
::
CompNode
&
comp_node
);
private:
std
::
function
<
void
(
TensorImplDft
*
)
>
m_get_memory_callback
;
std
::
function
<
void
(
TensorImplDft
*
)
>
m_reset_callback
;
std
::
shared_ptr
<
mgb
::
HostTensorND
>
m_host_tensor
;
std
::
shared_ptr
<
mgb
::
DeviceTensorND
>
m_dev_tensor
;
};
...
...
lite/src/network.cpp
浏览文件 @
8b764934
...
...
@@ -153,6 +153,10 @@ std::shared_ptr<Tensor> Network::get_output_tensor(size_t index) {
Network
&
Network
::
set_async_callback
(
const
AsyncCallback
&
callback
)
{
LITE_ERROR_HANDLER_BEGIN
LITE_ASSERT
(
!
m_config
.
options
.
force_output_use_user_specified_memory
,
"Async mode can't run with force_output_use_user_specified_memory which "
"output data is written to use specific memory."
);
LITE_CHECK_NON_NULL_POINTER
(
m_impl
);
m_impl
->
set_async_callback
(
std
::
move
(
callback
));
return
*
this
;
...
...
lite/test/test_network.cpp
浏览文件 @
8b764934
...
...
@@ -397,6 +397,73 @@ TEST(TestNetWork, ResetOutput) {
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
TEST
(
TestNetWork
,
OutputNoCopy
)
{
Config
config
;
config
.
options
.
force_output_use_user_specified_memory
=
true
;
auto
tensor
=
get_input_data
(
"./input_data.npy"
);
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
auto
result_mgb
=
mgb_lar
(
model_path
,
config
,
input_name
,
tensor
);
std
::
shared_ptr
<
Network
>
network
=
std
::
make_shared
<
Network
>
(
config
);
network
->
load_model
(
model_path
);
std
::
shared_ptr
<
Tensor
>
input_tensor
=
network
->
get_io_tensor
(
input_name
);
auto
src_ptr
=
tensor
->
get_memory_ptr
();
auto
src_layout
=
tensor
->
get_layout
();
input_tensor
->
reset
(
src_ptr
,
src_layout
);
std
::
shared_ptr
<
Tensor
>
output_tensor
=
network
->
get_output_tensor
(
0
);
size_t
times
=
5
;
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
result_tensors
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
auto
tmp
=
std
::
make_shared
<
Tensor
>
(
LiteDeviceType
::
LITE_CPU
,
Layout
{{
1
,
1000
},
2
,
LiteDataType
::
LITE_FLOAT
});
result_tensors
.
push_back
(
tmp
);
}
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
void
*
out_data
=
result_tensors
[
i
]
->
get_memory_ptr
();
output_tensor
->
reset
(
out_data
,
result_tensors
[
i
]
->
get_layout
());
network
->
forward
();
network
->
wait
();
ASSERT_EQ
(
output_tensor
->
get_memory_ptr
(),
out_data
);
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
compare_lite_tensor
<
float
>
(
result_tensors
[
i
],
result_mgb
);
}
}
TEST
(
TestNetWork
,
OutputDynamicAlloc
)
{
Config
config
;
config
.
options
.
force_output_dynamic_alloc
=
true
;
auto
tensor
=
get_input_data
(
"./input_data.npy"
);
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
auto
result_mgb
=
mgb_lar
(
model_path
,
config
,
input_name
,
tensor
);
std
::
shared_ptr
<
Network
>
network
=
std
::
make_shared
<
Network
>
(
config
);
network
->
load_model
(
model_path
);
std
::
shared_ptr
<
Tensor
>
input_tensor
=
network
->
get_io_tensor
(
input_name
);
auto
src_ptr
=
tensor
->
get_memory_ptr
();
auto
src_layout
=
tensor
->
get_layout
();
input_tensor
->
reset
(
src_ptr
,
src_layout
);
std
::
shared_ptr
<
Tensor
>
output_tensor
=
network
->
get_output_tensor
(
0
);
size_t
times
=
5
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
network
->
forward
();
network
->
wait
();
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
}
TEST
(
TestNetWork
,
AsyncExec
)
{
Config
config
;
config
.
options
.
var_sanity_check_first_run
=
false
;
...
...
src/core/impl/graph/cg_impl.cpp
浏览文件 @
8b764934
...
...
@@ -507,13 +507,12 @@ void ComputingGraphImpl::dest_var_optimize(VarNodeArray& dest_vars) {
i
->
add_flag
(
F
::
NO_MEM_RECLAIM
);
}
}
if
(
dest_vars
[
0
]
->
owner_graph
()
->
options
().
force_output_
write_to_user
_memory
)
{
if
(
dest_vars
[
0
]
->
owner_graph
()
->
options
().
force_output_
use_user_specified
_memory
)
{
for
(
auto
&&
i
:
dest_vars
)
{
mgb_assert
(
!
i
->
contain_flag
(
F
::
RT_FORCE_DYNAMIC_MEM_ALLOC
),
"var %s with force dynamic allocate should be set to write output "
"to "
"user memory"
,
"var %s with RT_FORCE_DYNAMIC_MEM_ALLOC flag should not set "
"force write output to user memory"
,
i
->
cname
());
i
->
add_flag
(
F
::
NO_SYS_MEM_ALLOC
|
F
::
NO_SYS_STATIC_MEM_ALLOC
|
...
...
src/core/impl/graph/var_node.cpp
浏览文件 @
8b764934
...
...
@@ -574,6 +574,10 @@ MemAllocPlan& VarNode::init_mem_plan(const DeviceTensorND* fixed_alloc) {
return
m_mem_plan
;
}
bool
VarNode
::
is_graph_dest_varnode
()
{
return
ComputingGraphImpl
::
downcast
(
owner_graph
())
->
var_receiver
(
this
).
size
()
==
0
;
}
VarNode
&
VarNode
::
add_flag
(
Flag
flag
)
{
modify_flag
(
flag
,
m_flag
|
flag
);
return
*
this
;
...
...
@@ -582,10 +586,13 @@ VarNode& VarNode::add_flag(Flag flag) {
void
VarNode
::
modify_flag
(
Flag
delta
,
Flag
new_flag
)
{
if
(
contain_flag
(
Flag
::
FLAG_FREEZED
))
{
mgb_assert
(
(
delta
&
(
Flag
::
NO_SYS_MEM_ALLOC
|
Flag
::
NO_MEM_RECLAIM
|
Flag
::
NO_SYS_STATIC_MEM_ALLOC
|
Flag
::
RT_FORCE_DYNAMIC_MEM_ALLOC
))
==
delta
||
(
new_flag
&
Flag
::
MEMORY_NO_NEED
));
(
delta
&
(
Flag
::
NO_MEM_RECLAIM
|
Flag
::
NO_SYS_STATIC_MEM_ALLOC
|
Flag
::
RT_FORCE_DYNAMIC_MEM_ALLOC
|
Flag
::
MEMORY_NO_NEED
))
==
delta
||
is_graph_dest_varnode
(),
"After the FLAG_FREEZED flag setting, var can only modify "
"NO_MEM_RECLAIM, NO_SYS_STATIC_MEM_ALLOC, RT_FORCE_DYNAMIC_MEM_ALLOC, "
"MEMORY_NO_NEED flag except graph dest var."
);
mgb_assert
(
!
ComputingGraphImpl
::
downcast
(
owner_graph
())
...
...
src/core/include/megbrain/graph/cg.h
浏览文件 @
8b764934
...
...
@@ -421,7 +421,7 @@ public:
* Force the output to be written to the user specified memory, which
* can optimize the copy of output data at one time
*/
bool
force_output_
write_to_user
_memory
=
false
;
bool
force_output_
use_user_specified
_memory
=
false
;
//! whether to perform var sanity check on first run
bool
var_sanity_check_first_run
=
true
;
...
...
src/core/include/megbrain/graph/var_node.h
浏览文件 @
8b764934
...
...
@@ -549,6 +549,10 @@ private:
MGE_WIN_DECLSPEC_FUC
void
modify_flag
(
Flag
delta
,
Flag
new_flag
);
//! whether the var is graph output, if it is output, the Flag of
//! NO_SYS_MEM_ALLOC can be modified.
bool
is_graph_dest_varnode
();
MGE_WIN_DECLSPEC_FUC
void
assign_dev_tensor_from_tensor
(
const
DeviceTensorND
&
value
);
...
...
src/gopt/test/no_memory_copy.cpp
浏览文件 @
8b764934
...
...
@@ -82,7 +82,7 @@ TEST(TestNoCopy, BasicInputNoCopy) {
TEST
(
TestNoCopy
,
IONoCopyPtrEQ
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_
write_to_user
_memory
=
true
;
compute_graph
->
options
().
force_output_
use_user_specified
_memory
=
true
;
test_graph
.
create_graph
();
auto
func
=
test_graph
.
compile_without_copy
();
auto
&&
outvar
=
func
->
get_output_vars
()[
0
];
...
...
@@ -123,7 +123,7 @@ TEST(TestNoCopy, IONoCopyPtrEQ) {
TEST
(
TestNoCopy
,
IONoCopyCorrect
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_
write_to_user
_memory
=
true
;
compute_graph
->
options
().
force_output_
use_user_specified
_memory
=
true
;
test_graph
.
create_graph
();
HostTensorND
truth
;
auto
func
=
test_graph
.
compile_without_copy
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录