Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d90cb776
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d90cb776
编写于
11月 24, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(src/core): record support change ptr basic
GitOrigin-RevId: b12f1c4a6655aeb8aeb28d660a34d0b6d84b37cb
上级
7b19bc76
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
533 addition
and
49 deletion
+533
-49
dnn/include/megdnn/basic_types.h
dnn/include/megdnn/basic_types.h
+70
-5
dnn/src/common/basic_types.cpp
dnn/src/common/basic_types.cpp
+10
-0
imperative/src/impl/ops/tensor_manip.cpp
imperative/src/impl/ops/tensor_manip.cpp
+2
-2
imperative/src/impl/tensor_sanity_check.cpp
imperative/src/impl/tensor_sanity_check.cpp
+1
-1
lite/src/mge/network_impl.cpp
lite/src/mge/network_impl.cpp
+24
-2
lite/src/mge/tensor_impl.cpp
lite/src/mge/tensor_impl.cpp
+13
-10
lite/src/mge/tensor_impl.h
lite/src/mge/tensor_impl.h
+1
-0
lite/test/test_network.cpp
lite/test/test_network.cpp
+63
-1
src/core/impl/graph/cg_impl_seq.cpp
src/core/impl/graph/cg_impl_seq.cpp
+6
-1
src/core/impl/graph/var_node_mem_mgr.cpp
src/core/impl/graph/var_node_mem_mgr.cpp
+1
-1
src/core/impl/tensor.cpp
src/core/impl/tensor.cpp
+31
-3
src/core/include/megbrain/tensor.h
src/core/include/megbrain/tensor.h
+21
-3
src/core/test/comp_node.cpp
src/core/test/comp_node.cpp
+75
-0
src/gopt/test/no_memory_copy.cpp
src/gopt/test/no_memory_copy.cpp
+195
-4
src/jit/impl/nvrtc/compiler_cuda.cpp
src/jit/impl/nvrtc/compiler_cuda.cpp
+3
-3
src/opr/impl/basic_arith.cpp
src/opr/impl/basic_arith.cpp
+10
-6
src/opr/impl/cond.cpp
src/opr/impl/cond.cpp
+1
-1
src/opr/impl/misc.cpp
src/opr/impl/misc.cpp
+2
-2
src/opr/impl/tensor_manip.cpp
src/opr/impl/tensor_manip.cpp
+1
-1
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+2
-2
src/plugin/impl/var_sanity_check.cpp
src/plugin/impl/var_sanity_check.cpp
+1
-1
未找到文件。
dnn/include/megdnn/basic_types.h
浏览文件 @
d90cb776
...
...
@@ -15,6 +15,8 @@
#include "megdnn/dtype.h"
#include "megdnn/internal/defs.h"
#include <memory>
#if MEGDNN_CC_HOST
#include <cstdarg>
#include <string>
...
...
@@ -402,31 +404,94 @@ struct TensorLayout : public TensorShape {
MGE_WIN_DECLSPEC_FUC
size_t
access_bytes
()
const
;
};
class
RefPtr
{
std
::
shared_ptr
<
void
*>
m_ref
;
size_t
m_offset
;
bool
m_mutable
;
public:
RefPtr
()
{
m_ref
=
std
::
make_shared
<
void
*>
((
void
*
)
nullptr
);
m_offset
=
0
;
m_mutable
=
true
;
}
RefPtr
(
void
*
ref_ptr
,
const
size_t
offset
=
0
)
{
m_ref
=
std
::
make_shared
<
void
*>
(
ref_ptr
);
m_offset
=
offset
;
m_mutable
=
true
;
}
explicit
RefPtr
(
std
::
shared_ptr
<
void
*>
ref_ptr
,
const
size_t
offset
=
0
,
bool
is_mutable
=
true
)
{
m_ref
=
ref_ptr
;
m_offset
=
offset
;
m_mutable
=
is_mutable
;
}
void
*
get_ptr
()
const
{
return
static_cast
<
void
*>
(
(
*
m_ref
!=
NULL
)
?
static_cast
<
dt_byte
*>
(
*
m_ref
)
+
m_offset
:
nullptr
);
}
bool
is_mutable
()
const
{
return
m_mutable
;
}
void
reset
(
const
void
*
ptr
,
size_t
offset
=
0
);
RefPtr
&
operator
+=
(
size_t
offset
)
{
m_offset
+=
offset
;
return
*
this
;
}
bool
operator
==
(
const
RefPtr
&
other
)
const
{
return
*
m_ref
==
*
other
.
m_ref
&&
m_offset
==
other
.
m_offset
;
}
template
<
typename
T
>
T
*
ptr
()
const
{
return
static_cast
<
T
*>
(
get_ptr
());
}
};
/**
* \brief A simple encapsulation class for n-dimensional tensor.
*/
struct
TensorND
{
void
*
raw_ptr
;
TensorLayout
layout
;
TensorND
()
:
raw_ptr
(
NULL
)
{}
TensorND
()
:
m_ref_ptr
(
RefPtr
((
void
*
)
nullptr
)
)
{}
TensorND
(
void
*
raw_ptr_
,
const
TensorLayout
&
layout_
)
:
raw_ptr
(
raw_ptr_
),
layout
(
layout_
)
{}
:
layout
(
layout_
),
m_ref_ptr
(
raw_ptr_
)
{}
TensorND
(
const
TensorLayout
&
layout_
,
const
RefPtr
&
ref_ptr
)
:
layout
(
layout_
),
m_ref_ptr
(
ref_ptr
)
{}
MGE_WIN_DECLSPEC_FUC
void
reset_ptr
(
void
*
ptr
,
size_t
offset
=
0
);
void
*
raw_ptr
()
const
{
return
m_ref_ptr
.
get_ptr
();
}
const
RefPtr
get_ref_ptr
()
const
{
return
m_ref_ptr
;
}
RefPtr
&
get_ref_ptr
()
{
return
m_ref_ptr
;
}
//! get typed pointer; type check is performed
template
<
typename
T
>
T
*
ptr
()
const
{
layout
.
dtype
.
assert_is_ctype
<
T
>
();
return
static_cast
<
T
*>
(
raw_ptr
);
return
static_cast
<
T
*>
(
m_ref_ptr
.
get_ptr
()
);
}
//! get typed pointer of compatible type
template
<
typename
T
>
T
*
compatible_ptr
()
const
{
layout
.
dtype
.
assert_is_compatible_ctype
<
T
>
();
return
reinterpret_cast
<
T
*>
(
raw_ptr
);
return
reinterpret_cast
<
T
*>
(
m_ref_ptr
.
get_ptr
()
);
}
private:
RefPtr
m_ref_ptr
;
};
#if MEGDNN_CC_HOST
...
...
dnn/src/common/basic_types.cpp
浏览文件 @
d90cb776
...
...
@@ -605,4 +605,14 @@ std::string TensorLayout::serialize() const {
return
rst
;
}
void
RefPtr
::
reset
(
const
void
*
ptr
,
size_t
offset
)
{
megdnn_assert
(
m_mutable
,
"this RefPtr can't change."
);
*
m_ref
=
const_cast
<
void
*>
(
ptr
);
m_offset
=
offset
;
}
void
TensorND
::
reset_ptr
(
void
*
ptr
,
size_t
offset
)
{
m_ref_ptr
.
reset
(
ptr
,
offset
);
}
// vim: syntax=cpp.doxygen
imperative/src/impl/ops/tensor_manip.cpp
浏览文件 @
d90cb776
...
...
@@ -342,7 +342,7 @@ void param_pack_concat_execute(
[
comp_node
](
dt_byte
*
ptr
)
{
comp_node
.
free_host
(
ptr
);
}};
TensorLayout
srcs_layout
=
TensorLayout
{{
nr_inputs
},
dtype
::
Int32
()};
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
srcs_raw_ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
;
srcs_raw_ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
;
}
HostTensorStorage
srcs_storage
;
srcs_storage
.
reset
(
comp_node
,
srcs_size
,
srcs_ptr
);
...
...
@@ -392,7 +392,7 @@ SmallVector<TensorPtr> param_pack_concat_apply_on_physical_tensor(
src_shapes
,
inputs
.
back
()
->
shape
(),
TensorShape
{});
}
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
srcs_raw_ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
;
srcs_raw_ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
;
}
HostTensorStorage
srcs_storage
;
srcs_storage
.
reset
(
comp_node
,
srcs_size
,
srcs_ptr
);
...
...
imperative/src/impl/tensor_sanity_check.cpp
浏览文件 @
d90cb776
...
...
@@ -26,7 +26,7 @@ TensorChecksumCalc::ChecksumResult TensorChecksumCalc::calc(TensorPtr ptr) {
auto
span
=
dt
.
layout
().
span
();
megdnn
::
TensorND
tensor
;
tensor
.
r
aw_ptr
=
dt
.
raw_ptr
()
+
span
.
low_byte
;
tensor
.
r
eset_ptr
(
dt
.
raw_ptr
()
+
span
.
low_byte
)
;
tensor
.
layout
.
init_contiguous_stride
({
span
.
dist_byte
()});
tensor
.
layout
.
dtype
=
dtype
::
Byte
();
...
...
lite/src/mge/network_impl.cpp
浏览文件 @
d90cb776
...
...
@@ -527,6 +527,10 @@ void NetworkImplDft::update_input() {
config_in
.
lite_tensor
->
set_layout
(
to_lite_layout
(
in_tensor_iter
.
second
->
layout
()));
}
TensorHelper
::
implement
(
config_in
.
lite_tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
m_record_reset
=
m_user_config
->
options
.
comp_node_seq_record_level
>
0
;
if
(
config_in
.
config_layout
.
ndim
&&
!
(
config_in
.
config_layout
==
config_in
.
lite_tensor
->
get_layout
()))
{
config_in
.
lite_tensor
->
set_layout
(
config_in
.
config_layout
);
...
...
@@ -541,6 +545,10 @@ void NetworkImplDft::update_input() {
TensorHelper
::
implement
(
io_in
.
lite_tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
m_host_tensor
=
in_tensor_iter
.
second
;
TensorHelper
::
implement
(
io_in
.
lite_tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
m_record_reset
=
m_user_config
->
options
.
comp_node_seq_record_level
>
0
;
io_in
.
lite_tensor
->
update_from_implement
();
m_network_io
->
inputs
.
push_back
(
io_in
);
}
...
...
@@ -603,6 +611,10 @@ void NetworkImplDft::update_output() {
}
try_infer_tensor_layout
(
out_it
->
lite_tensor
,
var
);
output_tensor_copy_optimize
(
var
,
out_it
->
lite_tensor
);
TensorHelper
::
implement
(
out_it
->
lite_tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
m_record_reset
=
m_user_config
->
options
.
comp_node_seq_record_level
>
0
;
}
//! user not set, use default output
}
else
{
...
...
@@ -631,6 +643,10 @@ void NetworkImplDft::update_output() {
lite_tensor
=
output
.
lite_tensor
;
}
output_tensor_copy_optimize
(
out
,
lite_tensor
);
TensorHelper
::
implement
(
lite_tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
m_record_reset
=
m_user_config
->
options
.
comp_node_seq_record_level
>
0
;
}
}
}
...
...
@@ -643,14 +659,20 @@ void NetworkImplDft::output_tensor_copy_optimize(
"Can't set force_output_use_user_specified_memory and "
"force_output_dynamic_alloc at the same time."
);
if
(
m_user_config
->
options
.
force_output_use_user_specified_memory
)
{
bool
in_record
=
m_user_config
->
options
.
comp_node_seq_record_level
>
0
;
TensorHelper
::
implement
(
tensor
)
->
cast_final_safe
<
TensorImplDft
>
()
.
set_reset_callback
([
var
](
TensorImplDft
*
dft_tensor
)
{
.
set_reset_callback
([
var
,
in_record
](
TensorImplDft
*
dft_tensor
)
{
dft_tensor
->
device_share_host_memory
();
auto
dv
=
dft_tensor
->
dev_tensor
().
get
();
dv
->
comp_node
(
var
.
node
()
->
comp_node
(),
true
);
var
.
node
()
->
init_mem_plan
(
dv
);
var
.
node
()
->
reset_dev_tensor_from_tensor
(
*
dv
);
if
(
in_record
)
{
auto
&&
device_tensor
=
var
.
node
()
->
mutable_dev_tensor
();
device_tensor
.
only_reset_raw_storage
(
dv
->
storage
());
}
else
{
var
.
node
()
->
reset_dev_tensor_from_tensor
(
*
dv
);
}
});
}
if
(
m_user_config
->
options
.
force_output_dynamic_alloc
)
{
...
...
lite/src/mge/tensor_impl.cpp
浏览文件 @
d90cb776
...
...
@@ -314,14 +314,22 @@ void TensorImplDft::reset(void* prepared_data) {
size_t
size
=
mge_layout
.
span
().
dist_byte
();
mgb
::
HostTensorStorage
storage
;
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_host_tensor
->
reset
(
storage
,
mge_layout
);
if
(
m_record_reset
)
{
m_host_tensor
->
only_reset_raw_storage
(
storage
);
}
else
{
m_host_tensor
->
reset
(
storage
,
mge_layout
);
}
}
else
{
auto
cn
=
m_dev_tensor
->
comp_node
();
auto
mge_layout
=
m_dev_tensor
->
layout
();
size_t
size
=
mge_layout
.
span
().
dist_byte
();
mgb
::
DeviceTensorStorage
storage
;
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
if
(
m_record_reset
)
{
m_dev_tensor
->
only_reset_raw_storage
(
storage
);
}
else
{
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
}
}
if
(
m_reset_callback
)
{
m_reset_callback
(
this
);
...
...
@@ -455,14 +463,9 @@ void TensorImplDft::device_share_host_memory() {
m_host_tensor
->
comp_node
(),
m_host_tensor
->
layout
());
}
if
(
m_host_tensor
->
raw_ptr
()
!=
m_dev_tensor
->
raw_ptr
())
{
auto
raw_storage
=
std
::
shared_ptr
<
mgb
::
dt_byte
>
(
m_host_tensor
->
raw_ptr
(),
[](
void
*
)
{});
auto
cn
=
m_host_tensor
->
comp_node
();
auto
mge_layout
=
m_host_tensor
->
layout
();
size_t
size
=
mge_layout
.
span
().
dist_byte
();
mgb
::
DeviceTensorStorage
storage
;
storage
.
reset
(
cn
,
size
,
raw_storage
);
m_dev_tensor
->
reset
(
storage
,
mge_layout
);
auto
&&
storage
=
mgb
::
DeviceTensorStorage
::
make_proxy
(
m_host_tensor
->
storage
());
m_dev_tensor
->
only_reset_raw_storage
(
storage
);
}
}
}
...
...
lite/src/mge/tensor_impl.h
浏览文件 @
d90cb776
...
...
@@ -126,6 +126,7 @@ private:
void
set_mge_tensor_compnode
(
const
mgb
::
CompNode
&
comp_node
);
private:
bool
m_record_reset
=
false
;
std
::
function
<
void
(
TensorImplDft
*
)
>
m_get_memory_callback
;
std
::
function
<
void
(
TensorImplDft
*
)
>
m_reset_callback
;
std
::
shared_ptr
<
mgb
::
HostTensorND
>
m_host_tensor
;
...
...
lite/test/test_network.cpp
浏览文件 @
d90cb776
...
...
@@ -412,9 +412,12 @@ TEST(TestNetWork, ResetOutput) {
compare_lite_tensor
<
float
>
(
output_tensor
,
result_mgb
);
}
TEST
(
TestNetWork
,
OutputNoCopy
)
{
namespace
{
void
test_output_no_copy
(
int
record
)
{
Config
config
;
config
.
options
.
force_output_use_user_specified_memory
=
true
;
config
.
options
.
comp_node_seq_record_level
=
record
;
auto
tensor
=
get_input_data
(
"./input_data.npy"
);
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
...
...
@@ -453,6 +456,65 @@ TEST(TestNetWork, OutputNoCopy) {
}
}
void
test_input_no_copy
(
int
record
)
{
Config
config
;
config
.
options
.
force_output_use_user_specified_memory
=
true
;
config
.
options
.
comp_node_seq_record_level
=
record
;
std
::
string
model_path
=
"./shufflenet.mge"
;
std
::
string
input_name
=
"data"
;
Layout
layout_in
{{
1
,
3
,
224
,
224
},
4
};
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
inputs
;
std
::
vector
<
std
::
shared_ptr
<
Tensor
>>
outputs
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
auto
tmp_in
=
std
::
make_shared
<
Tensor
>
(
LiteDeviceType
::
LITE_CPU
,
layout_in
);
auto
ptr
=
static_cast
<
float
*>
(
tmp_in
->
get_memory_ptr
());
for
(
size_t
id
=
0
;
id
<
2
*
224
*
224
;
id
++
)
{
ptr
[
id
]
=
i
+
1
;
}
inputs
.
push_back
(
tmp_in
);
outputs
.
push_back
(
mgb_lar
(
model_path
,
config
,
input_name
,
tmp_in
));
}
std
::
shared_ptr
<
Network
>
network
=
std
::
make_shared
<
Network
>
(
config
);
network
->
load_model
(
model_path
);
std
::
shared_ptr
<
Tensor
>
input_tensor
=
network
->
get_io_tensor
(
input_name
);
std
::
shared_ptr
<
Tensor
>
output_tensor
=
network
->
get_output_tensor
(
0
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
auto
ptr
=
inputs
[
i
]
->
get_memory_ptr
();
input_tensor
->
reset
(
ptr
,
layout_in
);
auto
tmp_out
=
std
::
make_shared
<
Tensor
>
(
LiteDeviceType
::
LITE_CPU
,
Layout
{{
1
,
1000
},
2
,
LiteDataType
::
LITE_FLOAT
});
output_tensor
->
reset
(
tmp_out
->
get_memory_ptr
(),
output_tensor
->
get_layout
());
network
->
forward
();
network
->
wait
();
compare_lite_tensor
<
float
>
(
output_tensor
,
outputs
[
i
]);
}
}
}
// namespace
TEST
(
TestNetWork
,
OutputNoCopy
)
{
test_output_no_copy
(
0
);
}
TEST
(
TestNetWork
,
OutputNoCopyRecord
)
{
test_output_no_copy
(
1
);
}
TEST
(
TestNetWork
,
IONoCopy
)
{
test_input_no_copy
(
0
);
}
TEST
(
TestNetWork
,
IONoCopyRecord
)
{
test_input_no_copy
(
1
);
}
TEST
(
TestNetWork
,
OutputDynamicAlloc
)
{
Config
config
;
config
.
options
.
force_output_dynamic_alloc
=
true
;
...
...
src/core/impl/graph/cg_impl_seq.cpp
浏览文件 @
d90cb776
...
...
@@ -250,9 +250,14 @@ std::unique_ptr<CompNodeSeqRecorder> ComputingGraphImpl::ComputingSequence::
"graph."
);
return
{};
}
auto
is_graph_dest_varnode
=
[
&
](
VarNode
*
var
)
{
return
ComputingGraphImpl
::
downcast
(
owner_graph
())
->
var_receiver
(
var
).
size
()
==
0
;
};
for
(
auto
i
:
*
m_opr_seq
)
{
for
(
auto
j
:
i
->
output
())
{
if
(
!
is_static_var_storage
(
j
))
{
if
(
!
is_static_var_storage
(
j
)
&&
!
is_graph_dest_varnode
(
j
)
)
{
mgb_log_error
(
"can not enable CompNodeSeqRecorder because var "
"storage not static: %s"
,
...
...
src/core/impl/graph/var_node_mem_mgr.cpp
浏览文件 @
d90cb776
...
...
@@ -319,7 +319,7 @@ bool VarNodeMemManager::DynamicAllocOprInfo::check_if_mem_status_change() {
for
(
size_t
i
=
0
;
i
<
dev_val_input
.
size
();
i
++
)
{
auto
&&
t
=
prev_dev_val_input
[
i
];
auto
s
=
dev_val_input
[
i
]
->
dev_tensor
().
as_megdnn
();
if
(
t
.
raw_ptr
!=
s
.
raw_ptr
||
!
t
.
layout
.
eq_layout
(
s
.
layout
))
{
if
(
t
.
raw_ptr
()
!=
s
.
raw_ptr
()
||
!
t
.
layout
.
eq_layout
(
s
.
layout
))
{
same
=
false
;
t
=
s
;
}
...
...
src/core/impl/tensor.cpp
浏览文件 @
d90cb776
...
...
@@ -233,6 +233,7 @@ TensorStorage<Trait>& TensorStorage<Trait>::operator=(const TensorStorage& rhs)
m_capacity
=
rhs
.
m_capacity
;
m_offset
=
rhs
.
m_offset
;
m_data
=
rhs
.
m_data
;
m_ref_ptr
=
rhs
.
m_ref_ptr
;
return
*
this
;
}
...
...
@@ -264,7 +265,8 @@ TensorStorage<Trait> TensorStorage<Trait>::sub(ptrdiff_t offset) const {
m_size
-
offset
,
m_capacity
-
offset
,
static_cast
<
size_t
>
(
toff
),
m_data
};
m_data
,
m_ref_ptr
};
}
template
<
class
Trait
>
...
...
@@ -278,8 +280,10 @@ dt_byte* TensorStorage<Trait>::apply_lazy_and_get_ptr() {
mgb_throw_if
(
!
ptr
,
SystemError
,
"failed to allocate memory"
);
CompNode
cn
=
m_comp_node
;
m_data
.
reset
(
ptr
,
[
cn
](
void
*
p
)
{
Trait
::
free
(
cn
,
p
);
});
m_ref_ptr
=
std
::
make_shared
<
void
*>
(
static_cast
<
void
*>
(
nullptr
));
m_capacity
=
m_size
;
}
*
m_ref_ptr
=
static_cast
<
void
*>
(
m_data
.
get
());
return
m_data
.
get
()
+
m_offset
;
}
...
...
@@ -305,6 +309,19 @@ void TensorStorage<Trait>::reset(CompNode node, size_t size, RawStorage data) {
m_capacity
=
size
;
m_offset
=
0
;
m_data
=
std
::
move
(
data
);
m_ref_ptr
=
std
::
make_shared
<
void
*>
(
static_cast
<
void
*>
(
m_data
.
get
()));
}
template
<
class
Trait
>
void
TensorStorage
<
Trait
>::
only_reset_raw_storage
(
CompNode
node
,
size_t
size
,
RawStorage
data
,
size_t
offset
)
{
mgb_assert
(
m_allow_realloc
);
m_comp_node
=
node
;
m_size
=
size
;
m_capacity
=
size
;
m_offset
=
offset
;
m_data
=
std
::
move
(
data
);
*
m_ref_ptr
=
static_cast
<
void
*>
(
m_data
.
get
());
}
template
<
class
Trait
>
...
...
@@ -316,8 +333,8 @@ TensorStorage<Trait> TensorStorage<Trait>::make_proxy(
"proxy source should be on CPU; got %s"
,
src
.
comp_node
().
to_string
().
c_str
());
src
.
ptr
();
return
{
true
,
src
.
m_comp_node
,
src
.
m_size
,
src
.
m_
capacity
,
src
.
m_offset
,
src
.
m_data
};
return
{
true
,
src
.
m_comp_node
,
src
.
m_size
,
src
.
m_capacity
,
src
.
m_
offset
,
src
.
m_data
,
src
.
m_ref_ptr
};
}
template
<
class
Trait
>
...
...
@@ -481,6 +498,17 @@ DEF(reset, &)(TensorStorage storage, const TensorLayout& layout) {
return
static_cast
<
ChainReturnType
&>
(
*
this
);
}
DEF
(
only_reset_raw_storage
,
&
)(
TensorStorage
storage
)
{
//! The storage to be reset is either satisfy the layout or empty.
//! Empty storage is used after weight preprocess for saving memory and
//! checking layout when running
mgb_assert
(
storage
.
valid_span
(
m_layout
.
span
())
||
storage
.
empty
());
m_storage
.
only_reset_raw_storage
(
storage
.
comp_node
(),
storage
.
size
(),
storage
.
raw_storage
(),
storage
.
offset
());
return
static_cast
<
ChainReturnType
&>
(
*
this
);
}
DEF
(
comp_node
,
&
)(
CompNode
comp_node
,
bool
allow_mem_node_change
)
{
auto
orig_cn
=
m_storage
.
comp_node_allow_invalid
();
m_storage
.
comp_node
(
comp_node
,
allow_mem_node_change
);
...
...
src/core/include/megbrain/tensor.h
浏览文件 @
d90cb776
...
...
@@ -225,6 +225,12 @@ public:
*/
MGE_WIN_DECLSPEC_FUC
void
reset
(
CompNode
node
,
size_t
size
,
RawStorage
data
);
/*!
* \brief reset the tensor storage to given memory area
*/
MGE_WIN_DECLSPEC_FUC
void
only_reset_raw_storage
(
CompNode
node
,
size_t
size
,
RawStorage
data
,
size_t
offset
);
/*!
* \brief make a TensorStorage that shares memory with another
* TensorStorage some different storage type
...
...
@@ -270,6 +276,11 @@ public:
return
m_data
;
}
std
::
shared_ptr
<
void
*>
get_ref_ptr
()
const
{
ptr
();
return
m_ref_ptr
;
}
private:
template
<
class
T
>
friend
class
TensorStorage
;
...
...
@@ -289,16 +300,20 @@ private:
RawStorage
m_data
;
std
::
shared_ptr
<
void
*>
m_ref_ptr
=
std
::
make_shared
<
void
*>
((
void
*
)
nullptr
);
//! used internally for returning a predefined TensorStorage
TensorStorage
(
bool
allow_realloc
,
CompNode
comp_node
,
size_t
size
,
size_t
capacity
,
size_t
offset
,
const
RawStorage
&
data
)
size_t
offset
,
const
RawStorage
&
data
,
std
::
shared_ptr
<
void
*>
ref_ptr
=
std
::
make_shared
<
void
*>
((
void
*
)
nullptr
))
:
m_allow_realloc
(
allow_realloc
),
m_comp_node
(
comp_node
),
m_size
(
size
),
m_capacity
(
capacity
),
m_offset
(
offset
),
m_data
(
data
)
{}
m_data
(
data
),
m_ref_ptr
(
ref_ptr
)
{}
void
check_comp_node_valid
()
const
{
if
(
mgb_unlikely
(
!
m_comp_node
.
valid
()))
...
...
@@ -423,6 +438,8 @@ public:
MGE_WIN_DECLSPEC_FUC
ChainReturnType
&
reset
(
TensorStorage
storage
,
const
TensorLayout
&
layout
);
MGE_WIN_DECLSPEC_FUC
ChainReturnType
&
only_reset_raw_storage
(
TensorStorage
storage
);
/* ================= getter and setters ================= */
/*!
...
...
@@ -501,7 +518,8 @@ public:
//! convert to megdnn::TensorND
megdnn
::
TensorND
as_megdnn
()
const
{
return
{
const_cast
<
void
*>
(
static_cast
<
const
void
*>
(
raw_ptr
())),
m_layout
};
megdnn
::
RefPtr
ref_ptr
(
m_storage
.
get_ref_ptr
(),
m_storage
.
offset
(),
false
);
return
{
m_layout
,
ref_ptr
};
}
/* ================= misc ================= */
...
...
src/core/test/comp_node.cpp
浏览文件 @
d90cb776
...
...
@@ -816,4 +816,79 @@ TYPED_TEST(TestCPUCompSeqRec, run_multi_thread_default) {
}
}
// anonymous namespace
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
TEST
(
TestCPUCompSeqRec
,
run_dyn_ptr
)
{
CompNode
cn
=
CompNode
::
load
(
"cpux"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
4
,
1
},
cn
),
host_y0
=
gen
({
4
,
1
},
cn
),
host_z0
=
gen
({
4
,
1
},
cn
);
auto
host_x1
=
gen
({
4
,
1
},
cn
),
host_y1
=
gen
({
4
,
1
},
cn
),
host_z1
=
gen
({
4
,
1
},
cn
);
auto
dev_x0
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
auto
dev_y0
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
auto
dev_z0
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
auto
dev_x1
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
auto
dev_y1
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
auto
dev_z1
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
);
(
*
dev_x0
).
comp_node
(
cn
).
copy_from
(
*
host_x0
).
sync
();
(
*
dev_y0
).
comp_node
(
cn
).
copy_from
(
*
host_y0
).
sync
();
(
*
dev_z0
).
comp_node
(
cn
).
copy_from
(
*
host_z0
).
sync
();
(
*
dev_x1
).
comp_node
(
cn
).
copy_from
(
*
host_x1
).
sync
();
(
*
dev_y1
).
comp_node
(
cn
).
copy_from
(
*
host_y1
).
sync
();
(
*
dev_z1
).
comp_node
(
cn
).
copy_from
(
*
host_z1
).
sync
();
auto
check
=
[
&
]()
{
HostTensorND
ret
(
CompNode
::
load
(
"cpux"
),
host_x0
->
shape
());
auto
px
=
host_x0
->
ptr
<
float
>
(),
py
=
host_y0
->
ptr
<
float
>
(),
pz
=
host_z0
->
ptr
<
float
>
(),
pw
=
ret
.
ptr
<
float
>
();
auto
sz0
=
host_x0
->
shape
()[
0
],
sz1
=
host_x0
->
shape
()[
1
];
for
(
size_t
i
=
0
;
i
<
sz0
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
sz1
;
++
j
)
{
pw
[
i
*
sz1
+
j
]
=
px
[
i
*
sz1
+
j
]
*
py
[
i
*
sz1
+
j
]
+
pz
[
i
*
sz1
+
j
];
}
}
return
ret
;
};
auto
graph
=
ComputingGraph
::
make
();
// test record on first run
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
graph_opt_level
=
0
;
graph
->
options
().
comp_node_seq_record_level
=
1
;
graph
->
options
().
fake_next_exec
=
true
;
auto
x
=
opr
::
VolatileSharedDeviceTensor
::
make
(
*
graph
,
dev_x0
),
y
=
opr
::
VolatileSharedDeviceTensor
::
make
(
*
graph
,
dev_y0
),
z
=
opr
::
VolatileSharedDeviceTensor
::
make
(
*
graph
,
dev_z0
),
w
=
opr
::
Elemwise
::
make
({
x
,
y
,
z
},
opr
::
Elemwise
::
Mode
::
FUSE_MUL_ADD3
);
HostTensorND
host_w
;
auto
func
=
graph
->
compile
({{
w
,
[
&
host_w
](
DeviceTensorND
&
d
)
{
host_w
=
mgb
::
HostTensorND
::
make_proxy
(
d
);
}}});
func
->
execute
();
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
if
(
i
==
2
)
{
*
host_x0
=
*
host_x1
;
*
host_y0
=
*
host_y1
;
*
host_z0
=
*
host_z1
;
dev_x0
->
only_reset_raw_storage
(
dev_x1
->
storage
());
dev_y0
->
only_reset_raw_storage
(
dev_y1
->
storage
());
dev_z0
->
only_reset_raw_storage
(
dev_z1
->
storage
());
}
func
->
execute
();
auto
expect
=
check
();
MGB_ASSERT_TENSOR_EQ
(
expect
,
host_w
)
<<
"iter "
<<
i
;
}
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/gopt/test/no_memory_copy.cpp
浏览文件 @
d90cb776
...
...
@@ -13,6 +13,7 @@
#include "./network.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/test/helper.h"
using
namespace
mgb
;
...
...
@@ -20,9 +21,11 @@ using namespace mgb;
struct
TestGraph
{
CompNode
m_cn
;
HostTensorGenerator
<>
m_gen
;
HostTensorGenerator
<
dtype
::
Int32
>
m_gen_int
;
std
::
unique_ptr
<
Network
>
m_network
;
SymbolVar
m_out_var
;
std
::
shared_ptr
<
HostTensorND
>
input_tensor
;
std
::
shared_ptr
<
HostTensorND
>
input_tensor2
;
TestGraph
()
{
m_cn
=
CompNode
::
load
(
"cpu0"
);
...
...
@@ -41,6 +44,78 @@ struct TestGraph {
m_out_var
=
m_network
->
add_pooling
(
f
,
{
2
,
2
},
{
2
,
2
});
}
void
create_graph_with_subtensor_forward
()
{
input_tensor
=
m_gen
({
2
,
3
,
32
,
32
},
m_cn
);
auto
input
=
opr
::
Host2DeviceCopy
::
make
(
*
m_network
->
graph
,
input_tensor
,
m_cn
)
.
rename
(
"input"
);
auto
cv
=
[
&
](
int
v
)
{
auto
rst
=
input
.
make_scalar
(
v
);
return
rst
;
};
using
Ad
=
opr
::
Subtensor
::
AxisIndexer
;
auto
sub
=
opr
::
Subtensor
::
make
(
input
,
{
Ad
::
make_interval
(
0
,
cv
(
1
),
cv
(
2
),
None
)});
auto
f
=
m_network
->
add_conv
(
sub
,
4
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
2
,
2
},
{
0
,
0
});
f
=
m_network
->
add_elemwise
(
{
f
},
dtype
::
Float32
(),
opr
::
Elemwise
::
Param
::
Mode
::
EXP
);
f
=
m_network
->
add_conv
(
f
,
8
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
1
,
1
},
{
1
,
1
});
m_out_var
=
m_network
->
add_pooling
(
f
,
{
2
,
2
},
{
2
,
2
});
}
void
create_graph_with_subtensor_relayout
()
{
input_tensor
=
m_gen
({
2
,
3
,
32
,
40
},
m_cn
);
auto
input
=
opr
::
Host2DeviceCopy
::
make
(
*
m_network
->
graph
,
input_tensor
,
m_cn
)
.
rename
(
"input"
);
auto
cv
=
[
&
](
int
v
)
{
auto
rst
=
input
.
make_scalar
(
v
);
return
rst
;
};
using
Ad
=
opr
::
Subtensor
::
AxisIndexer
;
auto
sub
=
opr
::
Subtensor
::
make
(
input
,
{
Ad
::
make_interval
(
0
,
cv
(
1
),
cv
(
2
),
None
),
Ad
::
make_interval
(
3
,
cv
(
0
),
cv
(
32
),
None
)});
auto
f
=
m_network
->
add_conv
(
sub
,
4
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
2
,
2
},
{
0
,
0
});
f
=
m_network
->
add_elemwise
(
{
f
},
dtype
::
Float32
(),
opr
::
Elemwise
::
Param
::
Mode
::
EXP
);
f
=
m_network
->
add_conv
(
f
,
8
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
1
,
1
},
{
1
,
1
});
m_out_var
=
m_network
->
add_pooling
(
f
,
{
2
,
2
},
{
2
,
2
});
}
void
create_graph_with_setsubtensor
()
{
input_tensor
=
m_gen
({
1
,
3
,
32
,
32
},
m_cn
);
input_tensor2
=
m_gen
({
1
,
1
,
32
,
32
},
m_cn
);
auto
input
=
opr
::
Host2DeviceCopy
::
make
(
*
m_network
->
graph
,
input_tensor
,
m_cn
)
.
rename
(
"input"
);
auto
input_sub
=
opr
::
Host2DeviceCopy
::
make
(
*
m_network
->
graph
,
input_tensor2
,
m_cn
)
.
rename
(
"input2"
);
auto
cv
=
[
&
](
int
v
)
{
auto
rst
=
input
.
make_scalar
(
v
);
return
rst
;
};
using
Ad
=
opr
::
Subtensor
::
AxisIndexer
;
input
=
opr
::
SetSubtensor
::
make
(
input
,
input_sub
,
{
Ad
::
make_interval
(
1
,
cv
(
1
),
cv
(
2
),
None
)});
auto
f
=
m_network
->
add_conv
(
input
,
4
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
2
,
2
},
{
0
,
0
});
f
=
m_network
->
add_elemwise
(
{
f
},
dtype
::
Float32
(),
opr
::
Elemwise
::
Param
::
Mode
::
EXP
);
f
=
m_network
->
add_conv
(
f
,
8
,
{
3
,
3
},
dtype
::
Float32
(),
true
,
{
1
,
1
},
{
1
,
1
});
m_out_var
=
m_network
->
add_pooling
(
f
,
{
2
,
2
},
{
2
,
2
});
}
std
::
unique_ptr
<
cg
::
AsyncExecutable
>
compile_without_copy
()
{
return
m_network
->
graph
->
compile
({{
m_out_var
,
nullptr
}});
}
...
...
@@ -51,8 +126,11 @@ struct TestGraph {
}
};
TEST
(
TestNoCopy
,
BasicInputNoCopy
)
{
namespace
{
void
test_basic_input_no_copy
(
bool
record
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
comp_node_seq_record_level
=
record
;
test_graph
.
create_graph
();
HostTensorND
out
,
out_pre
;
auto
func
=
test_graph
.
compile_with_copy
(
out
);
...
...
@@ -68,7 +146,11 @@ TEST(TestNoCopy, BasicInputNoCopy) {
for
(
size_t
d
=
0
;
d
<
length
;
d
++
)
{
ptr
[
d
]
=
i
;
}
input_tensor
->
reset
(
storage
,
layout
);
if
(
record
)
{
input_tensor
->
only_reset_raw_storage
(
storage
);
}
else
{
input_tensor
->
reset
(
storage
,
layout
);
}
}
func
->
execute
();
func
->
wait
();
...
...
@@ -78,6 +160,11 @@ TEST(TestNoCopy, BasicInputNoCopy) {
out_pre
.
copy_from
(
out
).
sync
();
}
}
}
// namespace
TEST
(
TestNoCopy
,
InputNoCopyPtrEQ
)
{
test_basic_input_no_copy
(
0
);
}
TEST
(
TestNoCopy
,
IONoCopyPtrEQ
)
{
auto
test_graph
=
TestGraph
();
...
...
@@ -158,8 +245,112 @@ TEST(TestNoCopy, IONoCopyCorrect) {
}
}
TEST
(
TestNoCopy
,
InputNoCopyRecord
)
{}
TEST
(
TestNoCopy
,
InputNoCopyRecord
)
{
test_basic_input_no_copy
(
1
);
}
TEST
(
TestNoCopy
,
IONoCopyRecord
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_use_user_specified_memory
=
true
;
compute_graph
->
options
().
comp_node_seq_record_level
=
1
;
test_graph
.
create_graph
();
HostTensorND
truth
;
auto
func
=
test_graph
.
compile_without_copy
();
auto
&&
outvar
=
func
->
get_output_vars
()[
0
];
DeviceTensorND
tmp
(
test_graph
.
m_cn
,
{
1
,
8
,
7
,
7
});
outvar
->
init_mem_plan
(
&
tmp
);
size_t
times
=
10
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
auto
input_tensor
=
test_graph
.
input_tensor
;
auto
layout
=
input_tensor
->
layout
();
size_t
length
=
layout
.
total_nr_elems
();
auto
storage
=
TensorStorage
<
HostTensorStorageTrait
>
(
test_graph
.
m_cn
);
storage
.
ensure_size
(
length
*
sizeof
(
float
));
float
*
ptr
=
storage
.
ptr
()
->
as
<
float
>
();
for
(
size_t
d
=
0
;
d
<
length
;
d
++
)
{
ptr
[
d
]
=
i
/
5
+
3
;
}
input_tensor
->
only_reset_raw_storage
(
storage
);
DeviceTensorND
dv
(
test_graph
.
m_cn
,
{
1
,
8
,
7
,
7
});
dv
.
raw_ptr
();
auto
&
dev_tensor
=
outvar
->
mutable_dev_tensor
();
dev_tensor
.
only_reset_raw_storage
(
dv
.
storage
());
func
->
execute
();
func
->
wait
();
if
(
i
%
5
==
0
)
{
truth
.
copy_from
(
dv
).
sync
();
continue
;
}
HostTensorND
to_check
;
to_check
.
copy_from
(
dv
).
sync
();
MGB_ASSERT_TENSOR_EQ
(
to_check
,
truth
);
}
}
namespace
{
void
test_subtensor_record
(
int
level
)
{
auto
test_graph
=
TestGraph
();
auto
compute_graph
=
test_graph
.
m_network
->
graph
;
compute_graph
->
options
().
force_output_use_user_specified_memory
=
true
;
compute_graph
->
options
().
comp_node_seq_record_level
=
1
;
if
(
level
==
2
)
{
test_graph
.
create_graph_with_setsubtensor
();
}
else
if
(
level
==
1
)
{
test_graph
.
create_graph_with_subtensor_forward
();
}
else
{
test_graph
.
create_graph_with_subtensor_relayout
();
}
HostTensorND
truth
;
auto
func
=
test_graph
.
compile_without_copy
();
auto
&&
outvar
=
func
->
get_output_vars
()[
0
];
DeviceTensorND
tmp
(
test_graph
.
m_cn
,
{
1
,
8
,
7
,
7
});
outvar
->
init_mem_plan
(
&
tmp
);
size_t
times
=
10
;
for
(
size_t
i
=
0
;
i
<
times
;
i
++
)
{
auto
input_tensor
=
test_graph
.
input_tensor
;
auto
layout
=
input_tensor
->
layout
();
size_t
length
=
layout
.
total_nr_elems
();
auto
storage
=
TensorStorage
<
HostTensorStorageTrait
>
(
test_graph
.
m_cn
);
storage
.
ensure_size
(
length
*
sizeof
(
float
));
float
*
ptr
=
storage
.
ptr
()
->
as
<
float
>
();
for
(
size_t
d
=
0
;
d
<
length
;
d
++
)
{
ptr
[
d
]
=
i
/
5
+
3
;
}
input_tensor
->
only_reset_raw_storage
(
storage
);
DeviceTensorND
dv
(
test_graph
.
m_cn
,
{
1
,
8
,
7
,
7
});
dv
.
raw_ptr
();
auto
&
dev_tensor
=
outvar
->
mutable_dev_tensor
();
dev_tensor
.
only_reset_raw_storage
(
dv
.
storage
());
func
->
execute
();
func
->
wait
();
if
(
i
%
5
==
0
)
{
truth
.
copy_from
(
dv
).
sync
();
continue
;
}
HostTensorND
to_check
;
to_check
.
copy_from
(
dv
).
sync
();
MGB_ASSERT_TENSOR_EQ
(
to_check
,
truth
);
}
}
}
// namespace
TEST
(
TestNoCopy
,
IONoCopyRecordSubTensor
)
{
test_subtensor_record
(
0
);
}
TEST
(
TestNoCopy
,
IONoCopyRecordSubTensorRelayout
)
{
test_subtensor_record
(
1
);
}
TEST
(
TestNoCopy
,
OutputNoCopyRecord
)
{}
//! TODO: the test should fix compnode memory copy, which now not record reference
//! ptr, when support it, the test will pass
/*TEST(TestNoCopy, IONoCopyRecordSetSubTensor) {
test_subtensor_record(2);
}*/
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/jit/impl/nvrtc/compiler_cuda.cpp
浏览文件 @
d90cb776
...
...
@@ -133,7 +133,7 @@ void setup_and_launch(const JITExecutor* fusion_opr, CUfunction func, int block_
host_init_pvisitor
<
out_dim
>
(
pvisitors
[
i
],
args
.
inputs
[
i
].
layout
);
}
datum
[
nr_inps
]
=
reinterpret_cast
<
CUdeviceptr
>
(
args
.
outputs
[
0
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
);
args
.
outputs
[
0
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
);
size_t
num_elements
=
args
.
outputs
[
0
].
layout
.
total_nr_elems
();
mgb_assert
(
num_elements
<=
UINT32_MAX
,
...
...
@@ -152,11 +152,11 @@ void setup_and_launch(const JITExecutor* fusion_opr, CUfunction func, int block_
exec_args
[
0
]
=
datum
.
data
();
exec_args
[
2
]
=
pvisitors
.
data
();
}
else
{
datum_dev
=
args
.
outputs
[
1
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
;
datum_dev
=
args
.
outputs
[
1
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
;
MGB_CUDA_CHECK
(
cudaMemcpyAsync
(
datum_dev
,
datum
.
data
(),
(
nr_inps
+
1
)
*
sizeof
(
CUdeviceptr
),
cudaMemcpyHostToDevice
,
env
.
cuda_env
().
stream
));
p_visitors_dev
=
args
.
outputs
[
2
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
;
p_visitors_dev
=
args
.
outputs
[
2
].
from
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
;
MGB_CUDA_CHECK
(
cudaMemcpyAsync
(
p_visitors_dev
,
pvisitors
.
data
(),
nr_inps
*
sizeof
(
ParamElemVisitor
<
out_dim
>
),
cudaMemcpyHostToDevice
,
...
...
src/opr/impl/basic_arith.cpp
浏览文件 @
d90cb776
...
...
@@ -1269,7 +1269,9 @@ void Reduce::KernScheduler::update_ptr(
mgb_assert
(
dest
.
shape
().
total_nr_elems
()
==
m_kern_param
.
back
().
output
.
layout
.
total_nr_elems
());
m_kern_param
[
0
].
input
.
raw_ptr
=
const_cast
<
dt_byte
*>
(
input
.
raw_ptr
());
auto
in_tensor
=
input
.
as_megdnn
();
in_tensor
.
layout
=
m_kern_param
[
0
].
input
.
layout
;
m_kern_param
[
0
].
input
=
in_tensor
;
dt_byte
*
workspace_begin
=
workspace_size
()
?
const_cast
<
dt_byte
*>
(
workspace
.
raw_ptr
())
...
...
@@ -1280,12 +1282,14 @@ void Reduce::KernScheduler::update_ptr(
*
kern_workspace
=
workspace_begin
+
m_workspace_spec
[
2
].
offset
;
for
(
size_t
i
=
0
;
i
<
m_kern_param
.
size
()
-
1
;
++
i
)
{
auto
optr
=
tmp_reduce_ptr
[
i
%
2
];
m_kern_param
[
i
].
output
.
r
aw_ptr
=
optr
;
m_kern_param
[
i
+
1
].
input
.
r
aw_ptr
=
optr
;
m_kern_param
[
i
].
output
.
r
eset_ptr
(
optr
)
;
m_kern_param
[
i
+
1
].
input
.
r
eset_ptr
(
optr
)
;
}
for
(
auto
&&
i
:
m_kern_param
)
i
.
workspace
.
raw_ptr
=
kern_workspace
;
m_kern_param
.
back
().
output
.
raw_ptr
=
const_cast
<
dt_byte
*>
(
dest
.
raw_ptr
());
auto
out_tensor
=
dest
.
as_megdnn
();
out_tensor
.
layout
=
m_kern_param
.
back
().
output
.
layout
;
m_kern_param
.
back
().
output
=
out_tensor
;
}
void
Reduce
::
KernScheduler
::
execute
(
...
...
@@ -1343,8 +1347,8 @@ void Reduce::KernScheduler::execute(
}
mgb_assert
(
input
.
layout
().
is_contiguous
()
&&
input
.
raw_ptr
()
==
m_kern_param
[
0
].
input
.
raw_ptr
&&
dest
.
raw_ptr
()
==
m_kern_param
.
back
().
output
.
raw_ptr
);
input
.
raw_ptr
()
==
m_kern_param
[
0
].
input
.
raw_ptr
()
&&
dest
.
raw_ptr
()
==
m_kern_param
.
back
().
output
.
raw_ptr
()
);
for
(
auto
&&
i
:
m_kern_param
)
{
opr
->
param
()
=
i
.
KernParam
::
kparam
;
opr
->
exec
(
i
.
input
,
i
.
output
,
i
.
workspace
);
...
...
src/opr/impl/cond.cpp
浏览文件 @
d90cb776
...
...
@@ -1157,7 +1157,7 @@ void CondExecMerge::scn_do_execute() {
if
(
forwarded
[
oidx
])
{
ovar
->
shape_alloc
(
ovar
->
shape
());
auto
&&
own_dest
=
ovar
->
dev_tensor
().
as_megdnn
();
mgb_assert
(
own_dest
.
raw_ptr
!=
dest
.
raw_ptr
);
mgb_assert
(
own_dest
.
raw_ptr
()
!=
dest
.
raw_ptr
()
);
dnn_opr
->
exec
({
dest
,
src
},
own_dest
);
forwarded
[
oidx
]
=
false
;
}
else
{
...
...
src/opr/impl/misc.cpp
浏览文件 @
d90cb776
...
...
@@ -241,9 +241,9 @@ void NvOf::scn_do_execute() {
}
nv_flow_extractor
->
extract_flow
(
static_cast
<
unsigned
char
*>
(
input
(
0
)
->
dev_tensor
().
as_megdnn
().
raw_ptr
),
static_cast
<
unsigned
char
*>
(
input
(
0
)
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
),
vshape
,
reinterpret_cast
<
int16_t
*>
(
output
(
0
)
->
dev_tensor
().
as_megdnn
().
raw_ptr
));
reinterpret_cast
<
int16_t
*>
(
output
(
0
)
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
));
}
void
NvOf
::
init_output_static_infer_desc
()
{
...
...
src/opr/impl/tensor_manip.cpp
浏览文件 @
d90cb776
...
...
@@ -1425,7 +1425,7 @@ void ParamPackConcat::scn_do_execute() {
m_inp_ptr
.
resize
(
inputs
.
size
()
-
1
);
auto
ptr
=
m_inp_ptr
.
data
();
for
(
size_t
i
=
0
;
i
<
inputs
.
size
()
-
1
;
i
++
)
{
ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
;
ptr
[
i
]
=
inputs
[
i
]
->
dev_tensor
().
as_megdnn
().
raw_ptr
()
;
}
auto
offsets
=
inputs
.
back
()
->
dev_tensor
().
as_megdnn
();
megdnn
::
TensorND
srcs
(
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
d90cb776
...
...
@@ -2572,8 +2572,8 @@ TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) {
ASSERT_EQ
(
pf
->
tensors
.
size
(),
2
);
ASSERT_TRUE
(
pf
->
tensors
[
0
].
layout
.
eq_shape
({
1
,
2
,
3
,
4
}));
ASSERT_TRUE
(
pf
->
tensors
[
1
].
layout
.
eq_shape
({
5
,
6
,
7
,
8
}));
ASSERT_NE
(
pf
->
tensors
[
0
].
raw_ptr
,
nullptr
);
ASSERT_NE
(
pf
->
tensors
[
1
].
raw_ptr
,
nullptr
);
ASSERT_NE
(
pf
->
tensors
[
0
].
raw_ptr
()
,
nullptr
);
ASSERT_NE
(
pf
->
tensors
[
1
].
raw_ptr
()
,
nullptr
);
pf
->
tensors
[
0
].
ptr
<
float
>
()[
0
]
=
114.514
f
;
pf
->
tensors
[
1
].
ptr
<
float
>
()[
0
]
=
1926.0817
f
;
}));
...
...
src/plugin/impl/var_sanity_check.cpp
浏览文件 @
d90cb776
...
...
@@ -178,7 +178,7 @@ VarSanityCheck::ChecksumResult VarSanityCheck::calc_checksum(VarNode* var) {
auto
span
=
dt
.
layout
().
span
();
megdnn
::
TensorND
tensor
;
tensor
.
r
aw_ptr
=
dt
.
raw_ptr
()
+
span
.
low_byte
;
tensor
.
r
eset_ptr
(
dt
.
raw_ptr
()
+
span
.
low_byte
)
;
tensor
.
layout
.
init_contiguous_stride
({
span
.
dist_byte
()});
tensor
.
layout
.
dtype
=
dtype
::
Byte
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录