Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
8b764934
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
407
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8b764934
编写于
3年前
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(lite): lite support output var no copy option
GitOrigin-RevId: 5b9488cb93fecb70f0ca0018edde29a6039f5510
上级
7642f66d
master
HuaHua404-patch-1
HuaHua404-patch-2
HuaHua404-patch-3
HuaHua404-patch-4
add-tools
dev-support-lite-fork-debug-mode
docstring-reshape
release-1.10
release-1.11
release-1.11.1
release-1.12.0
release-1.12.1
release-1.12.2
release-1.12.3
release-1.12.4
release-1.12.5
release-1.13.0
release-1.13.1
release-1.7
release-1.8
release-1.9
revert-410-docstring-zeros
revert-411-add-tools
test-try-import
tmp-test
try-import
v1.13.1
v1.13.0
v1.12.4
v1.12.3
v1.12.2
v1.12.1
v1.12.0
v1.11.1
v1.11.0
v1.10.0
v1.9.1
v1.9.0
v1.8.2
v1.8.1
v1.8.1.m1
v1.8.0
v1.7.2.m1
v1.7.1.m1
v1.7.0
v1.7.0.m1
无相关合并请求
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
214 addition
and
21 deletion
+214
-21
lite/include/lite/network.h
lite/include/lite/network.h
+1
-0
lite/lite-c/include/lite-c/network_c.h
lite/lite-c/include/lite-c/network_c.h
+1
-0
lite/lite-c/src/network.cpp
lite/lite-c/src/network.cpp
+4
-1
lite/pylite/megenginelite/network.py
lite/pylite/megenginelite/network.py
+3
-0
lite/src/mge/network_impl.cpp
lite/src/mge/network_impl.cpp
+57
-7
lite/src/mge/network_impl.h
lite/src/mge/network_impl.h
+6
-2
lite/src/mge/tensor_impl.cpp
lite/src/mge/tensor_impl.cpp
+37
-0
lite/src/mge/tensor_impl.h
lite/src/mge/tensor_impl.h
+13
-0
lite/src/network.cpp
lite/src/network.cpp
+4
-0
lite/test/test_network.cpp
lite/test/test_network.cpp
+67
-0
src/core/impl/graph/cg_impl.cpp
src/core/impl/graph/cg_impl.cpp
+3
-4
src/core/impl/graph/var_node.cpp
src/core/impl/graph/var_node.cpp
+11
-4
src/core/include/megbrain/graph/cg.h
src/core/include/megbrain/graph/cg.h
+1
-1
src/core/include/megbrain/graph/var_node.h
src/core/include/megbrain/graph/var_node.h
+4
-0
src/gopt/test/no_memory_copy.cpp
src/gopt/test/no_memory_copy.cpp
+2
-2
未找到文件。
lite/include/lite/network.h
浏览文件 @
8b764934
...
...
@@ -93,6 +93,7 @@ struct LITE_API Options {
bool
const_shape
=
false
;
bool
force_dynamic_alloc
=
false
;
bool
force_output_dynamic_alloc
=
false
;
bool
force_output_use_user_specified_memory
=
false
;
bool
no_profiling_on_shape_change
=
false
;
uint8_t
jit_level
=
0
;
uint8_t
comp_node_seq_record_level
=
0
;
...
...
This diff is collapsed.
Click to expand it.
lite/lite-c/include/lite-c/network_c.h
浏览文件 @
8b764934
...
...
@@ -83,6 +83,7 @@ typedef struct Options {
int
const_shape
;
int
force_dynamic_alloc
;
int
force_output_dynamic_alloc
;
int
force_output_use_user_specified_memory
;
int
no_profiling_on_shape_change
;
int
jit_level
;
int
comp_node_seq_record_level
;
...
...
This diff is collapsed.
Click to expand it.
lite/lite-c/src/network.cpp
浏览文件 @
8b764934
...
...
@@ -29,6 +29,7 @@ const LiteOptions default_option = {
.
const_shape
=
false
,
.
force_dynamic_alloc
=
false
,
.
force_output_dynamic_alloc
=
false
,
.
force_output_use_user_specified_memory
=
false
,
.
no_profiling_on_shape_change
=
false
,
.
jit_level
=
0
,
.
comp_node_seq_record_level
=
0
,
...
...
@@ -122,7 +123,9 @@ lite::Config convert_to_lite_config(const LiteConfig c_config) {
lite_config
.
options
.
var_sanity_check_first_run
=
c_config
.
options
.
var_sanity_check_first_run
;
lite_config
.
options
.
const_shape
=
c_config
.
options
.
const_shape
;
lite_config
.
options
.
force_dynamic_alloc
=
c_config
.
options
.
const_shape
;
lite_config
.
options
.
force_dynamic_alloc
=
c_config
.
options
.
force_dynamic_alloc
;
lite_config
.
options
.
force_output_use_user_specified_memory
=
c_config
.
options
.
force_output_use_user_specified_memory
;
lite_config
.
options
.
force_output_dynamic_alloc
=
c_config
.
options
.
force_output_dynamic_alloc
;
lite_config
.
options
.
no_profiling_on_shape_change
=
...
...
This diff is collapsed.
Click to expand it.
lite/pylite/megenginelite/network.py
浏览文件 @
8b764934
...
...
@@ -29,6 +29,7 @@ class LiteOptions(Structure):
(
"const_shape"
,
c_int
),
(
"force_dynamic_alloc"
,
c_int
),
(
"force_output_dynamic_alloc"
,
c_int
),
(
"force_output_use_user_specified_memory"
,
c_int
),
(
"no_profiling_on_shape_change"
,
c_int
),
(
"jit_level"
,
c_int
),
(
"comp_node_seq_record_level"
,
c_int
),
...
...
@@ -52,6 +53,7 @@ class LiteOptions(Structure):
self
.
const_shape
=
False
self
.
force_dynamic_alloc
=
False
self
.
force_output_dynamic_alloc
=
False
self
.
force_output_use_user_specified_memory
=
False
self
.
no_profiling_on_shape_change
=
False
self
.
jit_level
=
0
self
.
comp_node_seq_record_level
=
0
...
...
@@ -67,6 +69,7 @@ class LiteOptions(Structure):
"const_shape"
:
bool
(
self
.
const_shape
),
"force_dynamic_alloc"
:
bool
(
self
.
force_dynamic_alloc
),
"force_output_dynamic_alloc"
:
bool
(
self
.
force_output_dynamic_alloc
),
"force_output_nocopy"
:
bool
(
self
.
force_output_nocopy
),
"no_profiling_on_shape_change"
:
bool
(
self
.
no_profiling_on_shape_change
),
"jit_level"
:
self
.
jit_level
,
"comp_node_seq_record_level"
:
self
.
comp_node_seq_record_level
,
...
...
This diff is collapsed.
Click to expand it.
lite/src/mge/network_impl.cpp
浏览文件 @
8b764934
...
...
@@ -84,6 +84,9 @@ void NetworkImplDft::application_config() {
m_load_config
.
const_var_shape
=
m_user_config
->
options
.
const_shape
;
ConfigOption
(
force_dynamic_alloc
,
force_dynamic_alloc
);
ConfigOption
(
force_output_dynamic_alloc
,
force_output_dynamic_alloc
);
ConfigOption
(
force_output_use_user_specified_memory
,
force_output_use_user_specified_memory
);
ConfigOption
(
no_profiling_on_shape_change
,
no_profiling_on_shape_change
);
LITE_ASSERT
(
m_user_config
->
options
.
jit_level
==
0
||
...
...
@@ -250,7 +253,13 @@ void NetworkImplDft::make_output_spec() {
}
}
};
m_output_spec
.
emplace_back
(
load_out
,
std
::
move
(
cb
));
//! if write to user-specified memory, the CallbackCaller must be nullptr.
if
(
m_user_config
->
options
.
force_output_use_user_specified_memory
||
m_user_config
->
options
.
force_output_dynamic_alloc
)
{
m_output_spec
.
emplace_back
(
load_out
,
nullptr
);
}
else
{
m_output_spec
.
emplace_back
(
load_out
,
std
::
move
(
cb
));
}
}
else
{
LITE_THROW
(
ssprintf
(
"no output named : %s in the mode"
,
out
.
name
.
c_str
()));
}
...
...
@@ -444,8 +453,7 @@ void NetworkImplDft::set_io(const NetworkIO& network_io) {
}
}
void
NetworkImplDft
::
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
mgb
::
cg
::
SymbolVar
var
)
{
void
NetworkImplDft
::
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
Var
var
)
{
auto
&&
static_infer_mgr
=
m_load_config
.
comp_graph
->
static_infer_manager
();
auto
infer_trait
=
var
.
node
()
->
get_static_infer_trait
();
if
(
std
::
get
<
0
>
(
infer_trait
))
{
...
...
@@ -455,9 +463,13 @@ void NetworkImplDft::try_infer_tensor_layout(
"Lite infer output shape failed, maybe the model is "
"dynamic "
"shape.
\n
"
);
LITE_ASSERT
(
!
m_user_config
->
options
.
force_output_use_user_specified_memory
,
"force_output_use_user_specified_memory can't be used when output "
"shape can't be derived."
);
return
;
}
Layout
layout
=
to_lite_layout
(
mgb
::
TensorLayout
{
*
shape
,
var
.
dtype
()});
Layout
layout
=
to_lite_layout
(
TensorLayout
{
*
shape
,
var
.
dtype
()});
tensor
->
set_layout
(
layout
);
}
}
...
...
@@ -559,8 +571,7 @@ void NetworkImplDft::update_output() {
out_it
!=
m_network_io
->
outputs
.
end
();)
{
if
(
std
::
find_if
(
m_load_result
.
output_var_list
.
begin
(),
m_load_result
.
output_var_list
.
end
(),
[
out_it
](
const
mgb
::
SymbolVar
var
)
{
m_load_result
.
output_var_list
.
end
(),
[
out_it
](
const
SymbolVar
var
)
{
return
var
.
node
()
->
name
()
==
out_it
->
name
;
})
==
m_load_result
.
output_var_list
.
end
())
{
LITE_LOG
(
"%s is not the network output, ignore it."
,
out_it
->
name
.
c_str
());
...
...
@@ -584,7 +595,7 @@ void NetworkImplDft::update_output() {
out_it
->
lite_tensor
=
std
::
make_shared
<
Tensor
>
(
device_id
,
stream_id
,
device_type
);
}
mgb
::
SymbolVar
var
;
SymbolVar
var
;
for
(
auto
&&
out_var
:
m_load_result
.
output_var_list
)
{
if
(
out_var
.
node
()
->
name
()
==
out_it
->
name
)
{
var
=
out_var
;
...
...
@@ -592,10 +603,12 @@ void NetworkImplDft::update_output() {
}
}
try_infer_tensor_layout
(
out_it
->
lite_tensor
,
var
);
output_tensor_copy_optimize
(
var
,
out_it
->
lite_tensor
);
}
//! user not set, use default output
}
else
{
for
(
auto
&&
out
:
m_load_result
.
output_var_list
)
{
std
::
shared_ptr
<
Tensor
>
lite_tensor
=
nullptr
;
auto
it
=
std
::
find_if
(
m_network_io
->
outputs
.
begin
(),
m_network_io
->
outputs
.
end
(),
[
&
out
](
const
IOInner
io
)
{
return
io
.
name
==
out
.
node
()
->
name
();
});
...
...
@@ -608,6 +621,7 @@ void NetworkImplDft::update_output() {
std
::
make_shared
<
Tensor
>
(
device_id
,
stream_id
,
device_type
);
}
try_infer_tensor_layout
(
it
->
lite_tensor
,
out
);
lite_tensor
=
it
->
lite_tensor
;
}
else
{
IOInner
output
;
output
.
name
=
out
.
node
()
->
name
();
...
...
@@ -615,11 +629,47 @@ void NetworkImplDft::update_output() {
device_id
,
stream_id
,
device_type
,
true
);
m_network_io
->
outputs
.
push_back
({
output
});
try_infer_tensor_layout
(
output
.
lite_tensor
,
out
);
lite_tensor
=
output
.
lite_tensor
;
}
output_tensor_copy_optimize
(
out
,
lite_tensor
);
}
}
}
void
NetworkImplDft
::
output_tensor_copy_optimize
(
Var
var
,
std
::
shared_ptr
<
Tensor
>
tensor
)
{
LITE_ASSERT
(
!
(
m_user_config
->
options
.
force_output_use_user_specified_memory
&&
m_user_config
->
options
.
force_output_dynamic_alloc
),
"Can't set force_output_use_user_specified_memory and "
"force_output_dynamic_alloc at the same time."
);
if
(
m_user_config
->
options
.
force_output_use_user_specified_memory
)
{
TensorHelper
::
implement
(
tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
set_reset_callback
([
var
](
TensorImplDft
*
dft_tensor
)
{
dft_tensor
->
device_share_host_memory
();
auto
dv
=
dft_tensor
->
dev_tensor
().
get
();
dv
->
comp_node
(
var
.
node
()
->
comp_node
(),
true
);
var
.
node
()
->
init_mem_plan
(
dv
);
var
.
node
()
->
reset_dev_tensor_from_tensor
(
*
dv
);
});
}
if
(
m_user_config
->
options
.
force_output_dynamic_alloc
)
{
TensorHelper
::
implement
(
tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
set_get_memory_callback
([
var
](
TensorImplDft
*
dft_tensor
)
{
if
(
dft_tensor
->
is_host
())
{
auto
host_tensor
=
dft_tensor
->
m_host_tensor
;
*
host_tensor
=
HostTensorND
::
make_proxy
(
var
.
node
()
->
dev_tensor
());
}
else
{
auto
dev_tensor
=
dft_tensor
->
m_dev_tensor
;
*
dev_tensor
=
var
.
node
()
->
dev_tensor
();
}
});
}
}
std
::
shared_ptr
<
Tensor
>
NetworkImplDft
::
get_io_tensor
(
std
::
string
io_name
,
LiteTensorPhase
phase
)
{
if
(
phase
==
LiteTensorPhase
::
LITE_INPUT
||
phase
==
LiteTensorPhase
::
LITE_IO
)
{
...
...
This diff is collapsed.
Click to expand it.
lite/src/mge/network_impl.h
浏览文件 @
8b764934
...
...
@@ -12,6 +12,7 @@
#pragma once
#include "lite_build_config.h"
#include "megbrain/graph.h"
#if LITE_BUILD_WITH_MGE
#include "lite/network.h"
...
...
@@ -41,6 +42,7 @@ class NetworkImplDft final : public Network::NetworkImplBase {
public:
NetworkImplDft
()
{
m_load_config
.
comp_graph
=
mgb
::
ComputingGraph
::
make
();
}
using
S
=
megdnn
::
param
::
ExecutionPolicy
::
Strategy
;
using
Var
=
mgb
::
cg
::
SymbolVar
;
//! set the config of the network, include:
//! the inference device
//! the other inference options, such as record_level, weight_preprocess...
...
...
@@ -207,8 +209,10 @@ private:
void
compile_graph
();
//! try to infer output tensor layout
void
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
mgb
::
cg
::
SymbolVar
var
);
void
try_infer_tensor_layout
(
std
::
shared_ptr
<
Tensor
>
tensor
,
Var
var
);
//! optimized output tensor copy
void
output_tensor_copy_optimize
(
Var
var
,
std
::
shared_ptr
<
Tensor
>
tensor
);
private:
bool
m_async
=
false
;
...
...
This diff is collapsed.
Click to expand it.
lite/src/mge/tensor_impl.cpp
浏览文件 @
8b764934
...
...
@@ -149,6 +149,9 @@ Layout TensorImplDft::get_layout() const {
}
void
*
TensorImplDft
::
get_memory_ptr
()
const
{
if
(
m_get_memory_callback
)
{
m_get_memory_callback
(
const_cast
<
TensorImplDft
*>
(
this
));
}
if
(
is_host
())
{
return
static_cast
<
void
*>
(
m_host_tensor
->
raw_ptr
());
}
else
{
...
...
@@ -157,6 +160,9 @@ void* TensorImplDft::get_memory_ptr() const {
}
void
*
TensorImplDft
::
get_memory_ptr
(
const
std
::
vector
<
size_t
>&
idx
)
const
{
if
(
m_get_memory_callback
)
{
m_get_memory_callback
(
const_cast
<
TensorImplDft
*>
(
this
));
}
if
(
is_host
())
{
auto
elemsize_log
=
m_host_tensor
->
layout
().
dtype
.
size_log
();
switch
(
elemsize_log
)
{
...
...
@@ -317,6 +323,9 @@ void TensorImplDft::reset(void* prepared_data) {
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
}
if
(
m_reset_callback
)
{
m_reset_callback
(
this
);
}
}
void
TensorImplDft
::
reset
(
void
*
prepared_data
,
const
Layout
&
layout
)
{
...
...
@@ -430,6 +439,34 @@ void TensorImplDft::copy_from_mge_tensor(const mgb::DeviceTensorND& dv) {
}
}
void
TensorImplDft
::
set_reset_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
)
{
m_reset_callback
=
cb
;
}
void
TensorImplDft
::
set_get_memory_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
)
{
m_get_memory_callback
=
cb
;
}
void
TensorImplDft
::
device_share_host_memory
()
{
if
(
is_host
())
{
if
(
!
m_dev_tensor
)
{
m_dev_tensor
=
std
::
make_shared
<
mgb
::
DeviceTensorND
>
(
m_host_tensor
->
comp_node
(),
m_host_tensor
->
layout
());
}
if
(
m_host_tensor
->
raw_ptr
()
!=
m_dev_tensor
->
raw_ptr
())
{
auto
raw_storage
=
std
::
shared_ptr
<
mgb
::
dt_byte
>
(
m_host_tensor
->
raw_ptr
(),
[](
void
*
)
{});
auto
cn
=
m_host_tensor
->
comp_node
();
auto
mge_layout
=
m_host_tensor
->
layout
();
size_t
size
=
mge_layout
.
span
().
dist_byte
();
mgb
::
DeviceTensorStorage
storage
;
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
}
}
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
This diff is collapsed.
Click to expand it.
lite/src/mge/tensor_impl.h
浏览文件 @
8b764934
...
...
@@ -97,11 +97,22 @@ public:
//! get host tensor
std
::
shared_ptr
<
mgb
::
HostTensorND
>
host_tensor
()
const
{
return
m_host_tensor
;
}
//! get device tensor
std
::
shared_ptr
<
mgb
::
DeviceTensorND
>
dev_tensor
()
const
{
return
m_dev_tensor
;
}
//! copy from mgb tensor
void
copy_from_mge_tensor
(
const
mgb
::
DeviceTensorND
&
dv
);
//! set tensor reset callback
void
set_reset_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
);
//! set tensor get memory callback
void
set_get_memory_callback
(
const
std
::
function
<
void
(
TensorImplDft
*
)
>&
cb
);
//! shared the same memory with host and device tensor
void
device_share_host_memory
();
public:
friend
class
NetworkImplDft
;
...
...
@@ -115,6 +126,8 @@ private:
void
set_mge_tensor_compnode
(
const
mgb
::
CompNode
&
comp_node
);
private:
std
::
function
<
void
(
TensorImplDft
*
)
>
m_get_memory_callback
;
std
::
function
<
void
(
TensorImplDft
*
)
>
m_reset_callback
;
std
::
shared_ptr
<
mgb
::
HostTensorND
>
m_host_tensor
;
std
::
shared_ptr
<
mgb
::
DeviceTensorND
>
m_dev_tensor
;
};
...
...
This diff is collapsed.
Click to expand it.
lite/src/network.cpp
浏览文件 @
8b764934
...
...
@@ -153,6 +153,10 @@ std::shared_ptr<Tensor> Network::get_output_tensor(size_t index) {
Network
&
Network
::
set_async_callback
(
const
AsyncCallback
&
callback
)
{
LITE_ERROR_HANDLER_BEGIN
LITE_ASSERT
(
!
m_config
.
options
.
force_output_use_user_specified_memory
,
"Async mode can't run with force_output_use_user_specified_memory which "
"output data is written to use specific memory."
);
LITE_CHECK_NON_NULL_POINTER
(
m_impl
);
m_impl
->
set_async_callback
(
std
::
move
(
callback
));
return
*
this
;
...
...
This diff is collapsed.
Click to expand it.
lite/test/test_network.cpp
浏览文件 @
8b764934
...
...
@@ -397,6 +397,73 @@ TEST(TestNetWork, ResetOutput) {
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
TEST
(
TestNetWork
,
OutputNoCopy
)
{
Config
config
;
config
.
options
.
force_output_use_user_specified_memory
=
true
;
auto
tensor
=
get_input_data
(
"./input_data.npy"
);
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
auto
result_mgb
=
mgb_lar
(
model_path
,
config
,
input_name
,
tensor
);
std
::
shared_ptr
<
Network
>
network
=
std
::
make_shared
<
Network
>
(
config
);
network
->
load_model
(
model_path
);
std
::
shared_ptr
<
Tensor
>
input_tensor
=
network
->
get_io_tensor
(
input_name
);
auto
src_ptr
=
tensor
->
get_memory_ptr
();
auto
src_layout
=
tensor
->
get_layout
();
input_tensor
->
reset
(
src_ptr
,
src_layout
);
std
::
shared_ptr
<
Tensor
>
output_tensor
=
network
->
get_output_tensor
(
0
);
size_t
times
=
5
;
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
result_tensors
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
auto
tmp
=
std
::
make_shared
<
Tensor
>
(
LiteDeviceType
::
LITE_CPU
,
Layout
{{
1
,
1000
},
2
,
LiteDataType
::
LITE_FLOAT
});
result_tensors
.
push_back
(
tmp
);
}
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
void
*
out_data
=
result_tensors
[
i
]
->
get_memory_ptr
();
output_tensor
->
reset
(
out_data
,
result_tensors
[
i
]
->
get_layout
());
network
->
forward
();
network
->
wait
();
ASSERT_EQ
(
output_tensor
->
get_memory_ptr
(),
out_data
);
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
compare_lite_tensor
<
float
>
(
result_tensors
[
i
],
result_mgb
);
}
}
TEST
(
TestNetWork
,
OutputDynamicAlloc
)
{
Config
config
;
config
.
options
.
force_output_dynamic_alloc
=
true
;
auto
tensor
=
get_input_data
(
"./input_data.npy"
);
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
auto
result_mgb
=
mgb_lar
(
model_path
,
config
,
input_name
,
tensor
);
std
::
shared_ptr
<
Network
>
network
=
std
::
make_shared
<
Network
>
(
config
);
network
->
load_model
(
model_path
);
std
::
shared_ptr
<
Tensor
>
input_tensor
=
network
->
get_io_tensor
(
input_name
);
auto
src_ptr
=
tensor
->
get_memory_ptr
();
auto
src_layout
=
tensor
->
get_layout
();
input_tensor
->
reset
(
src_ptr
,
src_layout
);
std
::
shared_ptr
<
Tensor
>
output_tensor
=
network
->
get_output_tensor
(
0
);
size_t
times
=
5
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
network
->
forward
();
network
->
wait
();
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
}
TEST
(
TestNetWork
,
AsyncExec
)
{
Config
config
;
config
.
options
.
var_sanity_check_first_run
=
false
;
...
...
This diff is collapsed.
Click to expand it.
src/core/impl/graph/cg_impl.cpp
浏览文件 @
8b764934
...
...
@@ -507,13 +507,12 @@ void ComputingGraphImpl::dest_var_optimize(VarNodeArray& dest_vars) {
i
->
add_flag
(
F
::
NO_MEM_RECLAIM
);
}
}
if
(
dest_vars
[
0
]
->
owner_graph
()
->
options
().
force_output_
write_to_user
_memory
)
{
if
(
dest_vars
[
0
]
->
owner_graph
()
->
options
().
force_output_
use_user_specified
_memory
)
{
for
(
auto
&&
i
:
dest_vars
)
{
mgb_assert
(
!
i
->
contain_flag
(
F
::
RT_FORCE_DYNAMIC_MEM_ALLOC
),
"var %s with force dynamic allocate should be set to write output "
"to "
"user memory"
,
"var %s with RT_FORCE_DYNAMIC_MEM_ALLOC flag should not set "
"force write output to user memory"
,
i
->
cname
());
i
->
add_flag
(
F
::
NO_SYS_MEM_ALLOC
|
F
::
NO_SYS_STATIC_MEM_ALLOC
|
...
...
This diff is collapsed.
Click to expand it.
src/core/impl/graph/var_node.cpp
浏览文件 @
8b764934
...
...
@@ -574,6 +574,10 @@ MemAllocPlan& VarNode::init_mem_plan(const DeviceTensorND* fixed_alloc) {
return
m_mem_plan
;
}
bool
VarNode
::
is_graph_dest_varnode
()
{
return
ComputingGraphImpl
::
downcast
(
owner_graph
())
->
var_receiver
(
this
).
size
()
==
0
;
}
VarNode
&
VarNode
::
add_flag
(
Flag
flag
)
{
modify_flag
(
flag
,
m_flag
|
flag
);
return
*
this
;
...
...
@@ -582,10 +586,13 @@ VarNode& VarNode::add_flag(Flag flag) {
void
VarNode
::
modify_flag
(
Flag
delta
,
Flag
new_flag
)
{
if
(
contain_flag
(
Flag
::
FLAG_FREEZED
))
{
mgb_assert
(
(
delta
&
(
Flag
::
NO_SYS_MEM_ALLOC
|
Flag
::
NO_MEM_RECLAIM
|
Flag
::
NO_SYS_STATIC_MEM_ALLOC
|
Flag
::
RT_FORCE_DYNAMIC_MEM_ALLOC
))
==
delta
||
(
new_flag
&
Flag
::
MEMORY_NO_NEED
));
(
delta
&
(
Flag
::
NO_MEM_RECLAIM
|
Flag
::
NO_SYS_STATIC_MEM_ALLOC
|
Flag
::
RT_FORCE_DYNAMIC_MEM_ALLOC
|
Flag
::
MEMORY_NO_NEED
))
==
delta
||
is_graph_dest_varnode
(),
"After the FLAG_FREEZED flag setting, var can only modify "
"NO_MEM_RECLAIM, NO_SYS_STATIC_MEM_ALLOC, RT_FORCE_DYNAMIC_MEM_ALLOC, "
"MEMORY_NO_NEED flag except graph dest var."
);
mgb_assert
(
!
ComputingGraphImpl
::
downcast
(
owner_graph
())
...
...
This diff is collapsed.
Click to expand it.
src/core/include/megbrain/graph/cg.h
浏览文件 @
8b764934
...
...
@@ -421,7 +421,7 @@ public:
* Force the output to be written to the user specified memory, which
* can optimize the copy of output data at one time
*/
bool
force_output_
write_to_user
_memory
=
false
;
bool
force_output_
use_user_specified
_memory
=
false
;
//! whether to perform var sanity check on first run
bool
var_sanity_check_first_run
=
true
;
...
...
This diff is collapsed.
Click to expand it.
src/core/include/megbrain/graph/var_node.h
浏览文件 @
8b764934
...
...
@@ -549,6 +549,10 @@ private:
MGE_WIN_DECLSPEC_FUC
void
modify_flag
(
Flag
delta
,
Flag
new_flag
);
//! whether the var is graph output, if it is output, the Flag of
//! NO_SYS_MEM_ALLOC can be modified.
bool
is_graph_dest_varnode
();
MGE_WIN_DECLSPEC_FUC
void
assign_dev_tensor_from_tensor
(
const
DeviceTensorND
&
value
);
...
...
This diff is collapsed.
Click to expand it.
src/gopt/test/no_memory_copy.cpp
浏览文件 @
8b764934
...
...
@@ -82,7 +82,7 @@ TEST(TestNoCopy, BasicInputNoCopy) {
TEST
(
TestNoCopy
,
IONoCopyPtrEQ
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_
write_to_user
_memory
=
true
;
compute_graph
->
options
().
force_output_
use_user_specified
_memory
=
true
;
test_graph
.
create_graph
();
auto
func
=
test_graph
.
compile_without_copy
();
auto
&&
outvar
=
func
->
get_output_vars
()[
0
];
...
...
@@ -123,7 +123,7 @@ TEST(TestNoCopy, IONoCopyPtrEQ) {
TEST
(
TestNoCopy
,
IONoCopyCorrect
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_
write_to_user
_memory
=
true
;
compute_graph
->
options
().
force_output_
use_user_specified
_memory
=
true
;
test_graph
.
create_graph
();
HostTensorND
truth
;
auto
func
=
test_graph
.
compile_without_copy
();
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录