Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
e400b7ff
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
e400b7ff
编写于
2月 09, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(imperative): enable memory forwarding for imperative
GitOrigin-RevId: 7c1993979c051a1b01e168eefaa03a0386ddc7bc
上级
84d1a440
变更
35
展开全部
隐藏空白更改
内联
并排
Showing
35 changed file
with
1136 addition
and
681 deletion
+1136
-681
dnn/include/megdnn/basic_types.h
dnn/include/megdnn/basic_types.h
+2
-1
dnn/src/common/basic_types.cpp
dnn/src/common/basic_types.cpp
+1
-1
dnn/src/common/opr_delegate.cpp
dnn/src/common/opr_delegate.cpp
+2
-1
imperative/python/src/tensor.cpp
imperative/python/src/tensor.cpp
+1
-575
imperative/python/src/tensor.h
imperative/python/src/tensor.h
+2
-0
imperative/python/src/tensor_utils.cpp
imperative/python/src/tensor_utils.cpp
+630
-0
imperative/python/src/tensor_utils.h
imperative/python/src/tensor_utils.h
+11
-0
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+13
-3
imperative/src/impl/mgb_cg_impl.h
imperative/src/impl/mgb_cg_impl.h
+1
-0
imperative/src/impl/op_def.cpp
imperative/src/impl/op_def.cpp
+5
-0
imperative/src/impl/op_trait.cpp
imperative/src/impl/op_trait.cpp
+8
-0
imperative/src/impl/op_trait.h
imperative/src/impl/op_trait.h
+9
-0
imperative/src/impl/opr_utility.cpp
imperative/src/impl/opr_utility.cpp
+2
-2
imperative/src/impl/ops/broadcast.cpp
imperative/src/impl/ops/broadcast.cpp
+39
-27
imperative/src/impl/ops/elemwise.cpp
imperative/src/impl/ops/elemwise.cpp
+15
-5
imperative/src/impl/ops/reduce.cpp
imperative/src/impl/ops/reduce.cpp
+12
-1
imperative/src/impl/ops/rng.cpp
imperative/src/impl/ops/rng.cpp
+8
-0
imperative/src/impl/ops/specializations.cpp
imperative/src/impl/ops/specializations.cpp
+95
-2
imperative/src/impl/ops/utility.cpp
imperative/src/impl/ops/utility.cpp
+1
-1
imperative/src/impl/physical_tensor.cpp
imperative/src/impl/physical_tensor.cpp
+78
-15
imperative/src/impl/proxy_graph.cpp
imperative/src/impl/proxy_graph.cpp
+2
-5
imperative/src/impl/proxy_graph/mini_graph.h
imperative/src/impl/proxy_graph/mini_graph.h
+15
-16
imperative/src/impl/proxy_graph/proxy_graph.cpp
imperative/src/impl/proxy_graph/proxy_graph.cpp
+81
-0
imperative/src/impl/subgraph_detail.cpp
imperative/src/impl/subgraph_detail.cpp
+15
-0
imperative/src/include/megbrain/imperative/op_def.h
imperative/src/include/megbrain/imperative/op_def.h
+3
-0
imperative/src/include/megbrain/imperative/physical_tensor.h
imperative/src/include/megbrain/imperative/physical_tensor.h
+21
-8
imperative/src/include/megbrain/imperative/proxy_graph_detail.h
...tive/src/include/megbrain/imperative/proxy_graph_detail.h
+3
-0
imperative/src/include/megbrain/imperative/subgraph_detail.h
imperative/src/include/megbrain/imperative/subgraph_detail.h
+3
-0
src/core/impl/graph/cg_impl.cpp
src/core/impl/graph/cg_impl.cpp
+1
-1
src/core/impl/graph/cg_impl.h
src/core/impl/graph/cg_impl.h
+4
-3
src/core/impl/graph/var_node.cpp
src/core/impl/graph/var_node.cpp
+30
-1
src/core/impl/tensor.cpp
src/core/impl/tensor.cpp
+1
-1
src/core/include/megbrain/graph/cg.h
src/core/include/megbrain/graph/cg.h
+2
-1
src/core/include/megbrain/graph/var_node.h
src/core/include/megbrain/graph/var_node.h
+7
-0
src/opr/include/megbrain/opr/io.h
src/opr/include/megbrain/opr/io.h
+13
-11
未找到文件。
dnn/include/megdnn/basic_types.h
浏览文件 @
e400b7ff
...
...
@@ -285,7 +285,8 @@ struct TensorLayout : public TensorShape {
* stride
*/
void
add_axis_cont_inplace
(
size_t
axis
)
{
add_axis_inplace
(
axis
,
1
,
stride
[
axis
]
*
shape
[
axis
]);
ptrdiff_t
stride_
=
axis
<
ndim
?
stride
[
axis
]
*
shape
[
axis
]
:
1
;
add_axis_inplace
(
axis
,
1
,
stride_
);
}
/*!
...
...
dnn/src/common/basic_types.cpp
浏览文件 @
e400b7ff
...
...
@@ -382,7 +382,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const {
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
);
auto
ax
=
[](
size_t
shape0
,
size_t
shape1
,
ptrdiff_t
stride0
,
ptrdiff_t
stride1
)
{
return
(
shape0
==
shape1
)
&
((
shape0
=
=
1
)
|
(
stride0
==
stride1
));
return
(
shape0
==
shape1
)
&
((
shape0
<
=
1
)
|
(
stride0
==
stride1
));
};
if
(
ndim
==
rhs
.
ndim
)
{
size_t
eq
=
0
;
...
...
dnn/src/common/opr_delegate.cpp
浏览文件 @
e400b7ff
...
...
@@ -13,7 +13,8 @@
using
namespace
megdnn
;
const
std
::
shared_ptr
<
Handle
>&
megdnn
::
inplace_cpu_handle
(
int
debug_level
)
{
MGE_WIN_DECLSPEC_FUC
const
std
::
shared_ptr
<
Handle
>&
megdnn
::
inplace_cpu_handle
(
int
debug_level
)
{
auto
make
=
[](
int
deb_level
)
{
megcoreDeviceHandle_t
dev_handle
;
megcoreCreateDeviceHandle
(
&
dev_handle
,
megcorePlatformCPU
);
...
...
imperative/python/src/tensor.cpp
浏览文件 @
e400b7ff
此差异已折叠。
点击以展开。
imperative/python/src/tensor.h
浏览文件 @
e400b7ff
...
...
@@ -38,6 +38,8 @@ namespace mgb::imperative::python {
extern
interpreter
::
Interpreter
::
Channel
*
interpreter_for_py
;
extern
PyTypeObject
*
py_tensor_type
;
extern
PyObject
*
cpp_use_symbolic_shape
;
extern
PyObject
*
cpp_astensor1d
;
struct
Tensor
:
NonCopyableObj
{
private:
...
...
imperative/python/src/tensor_utils.cpp
0 → 100644
浏览文件 @
e400b7ff
此差异已折叠。
点击以展开。
imperative/python/src/tensor_utils.h
0 → 100644
浏览文件 @
e400b7ff
#pragma once
namespace
mgb
::
imperative
::
python
{
PyObject
*
make_shape_tuple
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
PyObject
*
getitem_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
PyObject
*
setitem_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
}
// namespace mgb::imperative::python
\ No newline at end of file
imperative/src/impl/interpreter/interpreter_impl.cpp
浏览文件 @
e400b7ff
...
...
@@ -642,7 +642,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) {
m_dtr
.
update_used_time
(
dest
);
MGB_RECORD_EVENT
(
TensorProduceEvent
,
dest
->
id
,
ptr
->
layout
(),
ptr
->
comp_node
(),
ptr
->
dev_tensor
().
raw_ptr
());
ptr
->
dev_tensor
(
false
).
raw_ptr
());
// update tensor desc for static infer
if
(
dest
->
desc
.
layout
.
ndim
)
{
mgb_assert
(
...
...
@@ -730,10 +730,20 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
inputs
,
apply_functor
,
const_functor
);
return
outputs
;
}
return
OpDef
::
apply_on_physical_tensor
(
def
,
inputs
,
output_descs
,
validated
);
// Check Input Layout
// Get the input layout constraints, and if the constraint is not satisfied
// inplace update the layout and blob to make the tensor contiguous
auto
&&
constraints
=
OpDef
::
get_input_layout_constraint
(
def
,
inputs
);
for
(
size_t
idx
=
0
;
idx
<
inputs
.
size
();
++
idx
)
{
auto
&&
layout_checker
=
constraints
[
idx
];
if
(
layout_checker
)
{
inputs
[
idx
]
->
to_contiguous_inplace
(
layout_checker
);
}
}
return
OpDef
::
apply_on_physical_tensor
(
def
,
std
::
move
(
inputs
),
output_descs
,
validated
);
};
MGB_RECORD_EVENT
(
OpExecuteEvent
,
apply_id
,
{},
reason
);
// Begin profiling operator
SmallVector
<
std
::
pair
<
CompNode
,
uint64_t
>>
kernels
;
if
(
profiling_device
)
{
// Collecting devices
...
...
imperative/src/impl/mgb_cg_impl.h
浏览文件 @
e400b7ff
#include "../../../src/core/impl/graph/cg_impl.h"
#include "../../../src/core/impl/graph/var_node_mem_mgr.h"
imperative/src/impl/op_def.cpp
浏览文件 @
e400b7ff
...
...
@@ -60,6 +60,11 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> OpDef::infer_output_attrs_falli
return
def
.
trait
()
->
infer_output_attrs_fallible
(
def
,
inputs
);
}
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
OpDef
::
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
return
def
.
trait
()
->
get_input_layout_constraint
(
def
,
inputs
);
}
EncodedSubgraph
OpDef
::
make_backward_graph
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
,
const
SmallVector
<
bool
>&
input_requires_grad
,
...
...
imperative/src/impl/op_trait.cpp
浏览文件 @
e400b7ff
...
...
@@ -47,6 +47,10 @@ void OpMethFallbackByProxyGraph::impl(
InferOutputAttrsFallible
&
func
,
op_meth_tag
::
InferOutputAttrsFallible
)
{
func
.
Base
::
operator
=
(
proxy_graph_detail
::
infer_output_attrs_fallible
);
}
void
OpMethFallbackByProxyGraph
::
impl
(
GetInputLayoutConstraint
&
func
,
op_meth_tag
::
GetInputLayoutConstraint
)
{
func
.
Base
::
operator
=
(
proxy_graph_detail
::
get_input_layout_constraint
);
}
void
OpMethFallbackByProxyGraph
::
impl
(
GradMaker
&
func
,
op_meth_tag
::
GradMaker
)
{
func
.
Base
::
operator
=
(
proxy_graph_detail
::
make_backward_graph
);
}
...
...
@@ -63,6 +67,10 @@ void OpMethFallbackFromSubgraph::impl(
InferOutputAttrsFallible
&
func
,
op_meth_tag
::
InferOutputAttrsFallible
)
{
func
.
Base
::
operator
=
(
subgraph_detail
::
infer_output_attrs_fallible
);
}
void
OpMethFallbackFromSubgraph
::
impl
(
GetInputLayoutConstraint
&
func
,
op_meth_tag
::
GetInputLayoutConstraint
)
{
func
.
Base
::
operator
=
(
subgraph_detail
::
get_input_layout_constraint
);
}
void
OpMethFallbackFromSubgraph
::
impl
(
GradMaker
&
func
,
op_meth_tag
::
GradMaker
)
{
func
.
Base
::
operator
=
(
subgraph_detail
::
make_backward_graph
);
}
...
...
imperative/src/impl/op_trait.h
浏览文件 @
e400b7ff
...
...
@@ -73,6 +73,9 @@ OpMethType(ApplyOnVarNode,
OpMethType
(
InferOutputAttrsFallible
,
decltype
(
OpDef
::
infer_output_attrs_fallible
));
OpMethType
(
GetInputLayoutConstraint
,
decltype
(
OpDef
::
get_input_layout_constraint
));
OpMethType
(
GradMaker
,
decltype
(
OpDef
::
make_backward_graph
));
...
...
@@ -119,6 +122,8 @@ struct OpMethFallbackByProxyGraph : OpMethImplBase {
static
void
impl
(
ApplyOnPhysicalTensor
&
func
,
op_meth_tag
::
ApplyOnPhysicalTensor
);
static
void
impl
(
InferOutputAttrsFallible
&
func
,
op_meth_tag
::
InferOutputAttrsFallible
);
static
void
impl
(
GetInputLayoutConstraint
&
func
,
op_meth_tag
::
GetInputLayoutConstraint
);
static
void
impl
(
GradMaker
&
func
,
op_meth_tag
::
GradMaker
);
};
...
...
@@ -128,6 +133,8 @@ struct OpMethFallbackFromSubgraph : OpMethImplBase {
static
void
impl
(
ApplyOnVarNode
&
func
,
op_meth_tag
::
ApplyOnVarNode
);
static
void
impl
(
InferOutputAttrsFallible
&
func
,
op_meth_tag
::
InferOutputAttrsFallible
);
static
void
impl
(
GetInputLayoutConstraint
&
func
,
op_meth_tag
::
GetInputLayoutConstraint
);
static
void
impl
(
GradMaker
&
func
,
op_meth_tag
::
GradMaker
);
};
...
...
@@ -179,6 +186,7 @@ struct OpTrait {
ApplyOnDeviceTensorND
apply_on_device_tensornd
;
ApplyOnVarNode
apply_on_var_node
;
InferOutputAttrsFallible
infer_output_attrs_fallible
;
GetInputLayoutConstraint
get_input_layout_constraint
;
GradMaker
make_backward_graph
;
Props
props
;
HashFunc
hash
;
...
...
@@ -199,6 +207,7 @@ struct OpTrait {
cb(apply_on_device_tensornd) \
cb(apply_on_var_node) \
cb(infer_output_attrs_fallible) \
cb(get_input_layout_constraint) \
cb(make_backward_graph) \
cb(props) \
cb(hash) \
...
...
imperative/src/impl/opr_utility.cpp
浏览文件 @
e400b7ff
...
...
@@ -117,7 +117,7 @@ void InputCallback::scn_do_execute() {
layout
.
init_contiguous_stride
();
dev_tensor
.
reset
(
dev_tensor
.
storage
(),
layout
);
}
output
(
0
)
->
reset
_dev_tensor_from_tensor
(
dev_tensor
);
output
(
0
)
->
force_assign
_dev_tensor_from_tensor
(
dev_tensor
);
}
cg
::
OperatorNodeBase
*
InputCallback
::
shallow_copy
(
...
...
@@ -311,7 +311,7 @@ cg::OperatorNodeBase::NodeProp* MutableTensor::do_make_node_prop() const {
}
void
MutableTensor
::
scn_do_execute
()
{
output
(
0
)
->
reset
_dev_tensor_from_tensor
(
*
m_dev_tensor
);
output
(
0
)
->
force_assign
_dev_tensor_from_tensor
(
*
m_dev_tensor
);
}
void
MutableTensor
::
init_output_static_infer_desc
()
{
...
...
imperative/src/impl/ops/broadcast.cpp
浏览文件 @
e400b7ff
...
...
@@ -83,28 +83,18 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&
input
=
inputs
[
0
];
TensorShape
target_shape
;
if
(
validated
)
{
target_shape
=
output_descs
[
0
].
layout
;
}
else
{
cg
::
copy_tensor_value_to_shape
(
target_shape
,
inputs
[
1
]
->
get_value
().
proxy_to_default_cpu
());
}
TensorPtr
output
=
Tensor
::
make
(
TensorLayout
(
target_shape
,
input
->
dtype
()),
input
->
comp_node
());
if
(
output
->
layout
().
is_empty
())
{
return
{
output
};
}
if
(
input
->
shape
().
eq_shape
(
output
->
shape
()))
{
mgb_assert
(
input
->
layout
().
eq_layout
(
output
->
layout
()));
output
->
dev_tensor
().
copy_from_fixlayout
(
input
->
dev_tensor
());
}
else
{
TensorLayout
input_layout
=
input
->
layout
().
broadcast
(
output
->
shape
());
output
->
dev_tensor
().
copy_from_fixlayout
(
input
->
dev_tensor
().
sub
(
SubTensorSpec
::
make_from_layout
(
input_layout
)));
}
return
{
output
};
def
.
cast_final_safe
<
Broadcast
>
();
size_t
nr_inp
=
inputs
.
size
();
mgb_assert
(
nr_inp
==
2
,
"Broadcast expects 2 inputs; got %lu actually"
,
nr_inp
);
auto
&&
src
=
inputs
[
0
];
auto
&&
tshp_nd
=
inputs
[
1
];
auto
slayout
=
src
->
layout
();
TensorShape
tshp
;
cg
::
copy_tensor_value_to_shape
(
tshp
,
tshp_nd
->
get_value
().
proxy_to_default_cpu
());
TensorLayout
tlayout
=
slayout
.
broadcast
(
tshp
);
// memory forward
return
{
Tensor
::
make
(
src
->
blob
(),
src
->
offset
(),
tlayout
)};
}
OP_TRAIT_REG
(
Broadcast
,
Broadcast
,
opr
::
Broadcast
)
...
...
@@ -184,10 +174,6 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
auto
&&
tshp_nd
=
inputs
[
1
];
auto
slayout
=
src
->
layout
();
if
(
validated
)
{
return
{
Tensor
::
make
(
src
->
blob
(),
0
,
output_descs
[
0
].
layout
)};
}
TensorShape
tshp
;
cg
::
copy_tensor_value_to_shape
(
tshp
,
tshp_nd
->
get_value
().
proxy_to_default_cpu
());
if
(
op_def
.
axis
!=
opr
::
Reshape
::
Param
::
INVALID_AXIS
)
{
...
...
@@ -195,13 +181,39 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
tshp
[
op_def
.
axis
]
=
1
;
tshp
[
op_def
.
axis
]
=
src
->
layout
().
total_nr_elems
()
/
tshp
.
total_nr_elems
();
}
return
{
Tensor
::
make
(
src
->
blob
(),
0
,
slayout
.
reshape
(
tshp
))};
TensorLayout
tlayout
;
mgb_assert
(
slayout
.
try_reshape
(
tlayout
,
tshp
));
return
{
Tensor
::
make
(
src
->
blob
(),
src
->
offset
(),
tlayout
)};
}
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
Reshape
>
();
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
layout_checker
(
inputs
.
size
());
layout_checker
[
0
]
=
[
&
](
const
TensorLayout
&
layout
)
{
TensorShape
tshp
;
TensorLayout
ret
;
cg
::
copy_tensor_value_to_shape
(
tshp
,
inputs
[
1
]
->
get_value
().
proxy_to_default_cpu
());
if
(
op_def
.
axis
!=
opr
::
Reshape
::
Param
::
INVALID_AXIS
)
{
mgb_assert
(
tshp
[
op_def
.
axis
]
==
-
1
);
tshp
[
op_def
.
axis
]
=
1
;
tshp
[
op_def
.
axis
]
=
layout
.
total_nr_elems
()
/
tshp
.
total_nr_elems
();
}
if
(
layout
.
try_reshape
(
ret
,
tshp
))
{
return
true
;
}
else
{
return
false
;
}
};
return
layout_checker
;
}
OP_TRAIT_REG
(
Reshape
,
Reshape
)
.
apply_on_var_node
(
apply_on_var_node
)
.
infer_output_attrs_fallible
(
infer_output_attrs_fallible
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
get_input_layout_constraint
(
get_input_layout_constraint
)
.
fallback
();
}
// namespace reshape
...
...
imperative/src/impl/ops/elemwise.cpp
浏览文件 @
e400b7ff
...
...
@@ -220,12 +220,22 @@ cg::OperatorNodeBase* apply_inplace_add_on_var_node(
SmallVector
<
TensorPtr
>
apply_inplace_add_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
mgb_assert
(
inputs
[
0
]
->
blob
().
use_count
()
==
1
&&
inputs
[
0
]
->
blob
()
->
storage
().
unique
(),
"This inplace modification may change the elements of other tensors. "
"Please set MEGENGINE_INPLACE_UPDATE to 0 to ensure the program runs "
"correctly."
);
auto
dest
=
inputs
[
0
],
delta
=
inputs
[
1
],
alpha
=
inputs
[
2
],
beta
=
inputs
[
3
];
if
(
!
(
inputs
[
0
]
->
blob
().
unique
()
&&
inputs
[
0
]
->
blob
()
->
storage
().
unique
()))
{
mgb_log_warn
(
"This inplace modification may change the elements of other tensors. "
"Fallback to non-inplace update."
);
DeviceTensorStorage
storage
;
storage
.
reset
(
dest
->
comp_node
(),
dest
->
blob
()
->
size
(),
dest
->
blob
()
->
storage
());
storage
=
storage
.
sub
(
dest
->
offset
());
DeviceTensorND
dv
;
dv
.
reset
(
storage
,
dest
->
layout
());
DeviceTensorND
dv_new
;
dv_new
.
copy_from
(
dv
);
dest
=
Tensor
::
make
(
dv_new
);
}
auto
tensor_to_scalar
=
[](
const
TensorPtr
&
tensor
)
->
float
{
return
*
tensor
->
get_value
().
ptr
<
float
>
();
};
...
...
imperative/src/impl/ops/reduce.cpp
浏览文件 @
e400b7ff
...
...
@@ -54,7 +54,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
if
(
memory_forward_success
(
def
,
inputs
))
{
return
{
Tensor
::
make
(
inputs
[
0
]
->
blob
(),
0
,
inputs
[
0
]
->
layout
())};
return
{
Tensor
::
make
(
inputs
[
0
]
->
blob
(),
inputs
[
0
]
->
offset
(),
inputs
[
0
]
->
layout
())};
}
return
proxy_graph_detail
::
apply_on_physical_tensor
(
def
,
inputs
,
output_descs
,
validated
);
...
...
@@ -73,11 +74,21 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
return
{
output_descs
,
validated
};
}
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
layout_checker
(
inputs
.
size
());
layout_checker
[
0
]
=
[](
const
TensorLayout
&
layout
)
{
return
layout
.
is_contiguous
();
};
return
layout_checker
;
}
OP_TRAIT_REG
(
Reduce
,
Reduce
,
opr
::
Reduce
)
.
make_from_op_node
(
make_from_op_node
)
.
apply_on_var_node
(
apply_on_var_node
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
infer_output_attrs_fallible
(
infer_output_attrs_fallible
)
.
get_input_layout_constraint
(
get_input_layout_constraint
)
.
fallback
();
}
// namespace reduce
}
// namespace
...
...
imperative/src/impl/ops/rng.cpp
浏览文件 @
e400b7ff
...
...
@@ -594,6 +594,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible<Dro
return
{
dests
,
true
};
}
template
<
typename
Op
>
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
layout_checker
(
inputs
.
size
());
return
layout_checker
;
}
}
// anonymous namespace
Handle
new_handle
(
CompNode
comp_node
,
uint64_t
seed
)
{
...
...
@@ -622,6 +629,7 @@ CompNode get_rng_handle_compnode(Handle handle) {
.apply_on_var_node(apply_on_var_node<NAME, Output>) \
.apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \
.infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \
.get_input_layout_constraint(get_input_layout_constraint<NAME>) \
.fallback(); \
}
...
...
imperative/src/impl/ops/specializations.cpp
浏览文件 @
e400b7ff
...
...
@@ -60,9 +60,55 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
return
opr
::
Dimshuffle
::
make
(
inputs
[
0
],
ds
.
pattern
,
0UL
,
config
);
}
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
ds
=
static_cast
<
const
Dimshuffle
&>
(
def
);
mgb_assert
(
ds
.
pattern
.
size
()
<=
TensorShape
::
MAX_NDIM
,
"Dimshuffle pattern exceeds max length of %zd"
,
TensorShape
::
MAX_NDIM
);
size_t
nr_inp
=
inputs
.
size
();
mgb_assert
(
nr_inp
==
1
,
"Dimshuffle expects 1 inputs; got %lu actually"
,
nr_inp
);
auto
&&
src
=
inputs
[
0
];
auto
inp_layout
=
src
->
layout
();
size_t
pattern_ndim
=
*
std
::
max_element
(
ds
.
pattern
.
begin
(),
ds
.
pattern
.
end
())
+
1
;
mgb_assert
(
inp_layout
.
ndim
==
pattern_ndim
,
"input ndim mismatch for Dimshuffle: expect=%zd actual=%zd"
,
pattern_ndim
,
inp_layout
.
ndim
);
TensorLayout
out_layout
{
inp_layout
.
dtype
};
out_layout
.
ndim
=
ds
.
pattern
.
size
();
size_t
idx
=
0
;
bool
input_used
[
TensorLayout
::
MAX_NDIM
]
=
{
0
};
for
(
auto
i
:
ds
.
pattern
)
{
if
(
i
<
0
)
{
out_layout
.
shape
[
idx
]
=
1
;
out_layout
.
stride
[
idx
]
=
1
;
}
else
{
input_used
[
i
]
=
true
;
out_layout
.
shape
[
idx
]
=
inp_layout
.
shape
[
i
];
out_layout
.
stride
[
idx
]
=
inp_layout
.
stride
[
i
];
}
++
idx
;
}
if
(
out_layout
.
is_contiguous
())
{
out_layout
.
init_contiguous_stride
();
}
for
(
size_t
i
=
0
;
i
<
pattern_ndim
;
++
i
)
{
mgb_assert
(
input_used
[
i
]
||
inp_layout
.
shape
[
i
]
==
1
,
"non-1 dim discarded in Dimshuffle: ishp=%s dim=%zd"
,
inp_layout
.
megdnn
::
TensorShape
::
to_string
().
c_str
(),
i
);
}
// memory forward
return
{
Tensor
::
make
(
src
->
blob
(),
src
->
offset
(),
out_layout
)};
}
OP_TRAIT_REG
(
Dimshuffle
,
Dimshuffle
,
opr
::
Dimshuffle
)
.
make_from_op_node
(
make_from_op_node
)
.
apply_on_var_node
(
apply_on_var_node
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
}
// namespace dimshuffle
}
// namespace
...
...
@@ -80,7 +126,25 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
return
opr
::
AxisAddRemove
::
make
(
inputs
[
0
],
param
,
config
);
}
OP_TRAIT_REG
(
AddAxis
,
AddAxis
).
apply_on_var_node
(
apply_on_var_node
).
fallback
();
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
AddAxis
>
();
size_t
nr_inp
=
inputs
.
size
();
mgb_assert
(
nr_inp
==
1
,
"AddAxis expects 1 inputs; got %lu actually"
,
nr_inp
);
auto
&&
src
=
inputs
[
0
];
auto
tlayout
=
src
->
layout
();
for
(
auto
&&
i
:
op_def
.
axis
)
{
tlayout
.
add_axis_cont_inplace
(
i
);
}
// memory forward
return
{
Tensor
::
make
(
src
->
blob
(),
src
->
offset
(),
tlayout
)};
}
OP_TRAIT_REG
(
AddAxis
,
AddAxis
)
.
apply_on_var_node
(
apply_on_var_node
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
}
// namespace add_axis
}
// namespace
...
...
@@ -97,7 +161,36 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
return
opr
::
AxisAddRemove
::
make
(
inputs
[
0
],
param
,
config
);
}
OP_TRAIT_REG
(
RemoveAxis
,
RemoveAxis
).
apply_on_var_node
(
apply_on_var_node
).
fallback
();
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
RemoveAxis
>
();
size_t
nr_inp
=
inputs
.
size
();
mgb_assert
(
nr_inp
==
1
,
"RemoveAxis expects 1 inputs; got %lu actually"
,
nr_inp
);
auto
&&
src
=
inputs
[
0
];
auto
tlayout
=
src
->
layout
();
for
(
auto
&&
i
:
op_def
.
axis
)
{
if
(
tlayout
.
ndim
==
1
)
{
mgb_assert
(
tlayout
.
shape
[
0
]
==
1
&&
i
==
0
,
"can not remove axis %u from tensor of shape=%s"
,
i
,
tlayout
.
megdnn
::
TensorShape
::
to_string
().
c_str
());
}
else
{
mgb_assert
(
i
<
tlayout
.
ndim
&&
tlayout
.
shape
[
i
]
==
1
,
"can not remove axis %u from tensor of shape=%s"
,
i
,
tlayout
.
megdnn
::
TensorShape
::
to_string
().
c_str
());
tlayout
.
remove_axis_inplace
(
i
);
}
}
// memory forward
return
{
Tensor
::
make
(
src
->
blob
(),
src
->
offset
(),
tlayout
)};
}
OP_TRAIT_REG
(
RemoveAxis
,
RemoveAxis
)
.
apply_on_var_node
(
apply_on_var_node
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
}
// namespace remove_axis
}
// namespace
...
...
imperative/src/impl/ops/utility.cpp
浏览文件 @
e400b7ff
...
...
@@ -411,7 +411,7 @@ struct ComputingGraphHolder {
executable
->
wait
();
size_t
nr_inputs
=
inputs
.
size
();
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
auto
input_dev_tensor
=
input_tensors
[
i
]
->
dev_tensor
();
auto
input_dev_tensor
=
input_tensors
[
i
]
->
dev_tensor
(
false
);
inputs
[
i
].
device_value
->
reset
(
input_dev_tensor
.
storage
(),
input_dev_tensor
.
layout
());
if
(
inputs
[
i
].
host_value
)
{
...
...
imperative/src/impl/physical_tensor.cpp
浏览文件 @
e400b7ff
...
...
@@ -95,7 +95,13 @@ const Blob::RawStorage& Blob::storage() {
Tensor
::
Tensor
(
BlobPtr
blob
,
const
TensorLayout
&
layout
,
size_t
offset
,
const
HostTensorND
&
hv
)
:
m_layout
(
layout
),
m_blob
(
std
::
move
(
blob
)),
m_offset
(
offset
),
m_value
(
hv
)
{}
:
m_cn
(
blob
->
comp_node
()),
m_shape
(
layout
),
m_dtype
(
layout
.
dtype
),
m_layout
(
layout
),
m_blob
(
std
::
move
(
blob
)),
m_offset
(
offset
),
m_value
(
hv
)
{}
Tensor
::
Tensor
(
const
HostTensorND
&
hv
)
:
Tensor
(
hv
.
layout
(),
hv
.
comp_node
())
{
constexpr
int
size_threshold
=
TensorShape
::
MAX_NDIM
;
...
...
@@ -107,7 +113,12 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
MGB_RECORD_EVENT
(
profiler
::
HostToDeviceEvent
,
hv
.
layout
(),
hv
.
comp_node
(),
hv
.
raw_ptr
(),
dev_tensor
().
raw_ptr
());
dev_tensor
().
copy_from_fixlayout
(
hv
);
DeviceTensorStorage
storage
;
storage
.
reset
(
m_cn
,
m_blob
->
size
(),
m_blob
->
storage
());
storage
=
storage
.
sub
(
m_offset
);
DeviceTensorND
dv
;
dv
.
reset
(
storage
,
m_layout
);
dv
.
copy_from_fixlayout
(
hv
);
// even though hv is saved in m_value, Tensor itself could be
// released before copy completes
MGB_RECORD_EVENT
(
...
...
@@ -117,25 +128,36 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) {
}
}
Tensor
::
Tensor
(
const
DeviceTensorND
&
dv
,
const
HostTensorND
&
hv
)
{
Tensor
::
Tensor
(
const
DeviceTensorND
&
dv
,
const
HostTensorND
&
hv
)
:
m_offset
(
dv
.
storage
().
offset
()),
m_cn
(
dv
.
comp_node
()),
m_shape
(
dv
.
layout
()),
m_dtype
(
dv
.
layout
().
dtype
),
m_blob
(
Blob
::
make
(
dv
.
storage
())),
m_layout
(
dv
.
layout
())
{
if
(
!
hv
.
empty
())
{
mgb_assert
(
dv
.
comp_node
()
==
hv
.
comp_node
());
mgb_assert
(
dv
.
dtype
()
==
hv
.
dtype
());
mgb_assert
(
dv
.
shape
().
eq_shape
(
hv
.
shape
()));
m_value
=
hv
;
}
m_layout
=
dv
.
layout
();
m_blob
=
Blob
::
make
(
dv
.
storage
());
m_offset
=
dv
.
storage
().
offset
();
}
Tensor
::
Tensor
(
const
TensorLayout
&
layout
,
const
CompNode
&
cn
)
:
m_layout
{
layout
},
m_blob
{
Blob
::
make
(
cn
,
layout
.
span
().
dist_byte
())},
m_offset
{
0
}
{}
m_offset
{
0
},
m_cn
(
cn
),
m_shape
(
layout
),
m_dtype
(
layout
.
dtype
)
{}
Tensor
::
Tensor
(
const
BlobPtr
blob
,
const
size_t
offset
,
const
TensorLayout
&
layout
)
:
m_layout
{
layout
},
m_blob
{
blob
},
m_offset
{
offset
}
{}
:
m_layout
{
layout
},
m_blob
{
blob
},
m_offset
{
offset
},
m_cn
(
blob
->
comp_node
()),
m_shape
(
layout
),
m_dtype
(
layout
.
dtype
)
{}
TensorPtr
Tensor
::
make
(
const
HostTensorND
&
hv
)
{
auto
&&
blob
=
MultiCNConstTensorCache
::
inst
().
lookup
(
hv
);
...
...
@@ -145,10 +167,45 @@ TensorPtr Tensor::make(const HostTensorND& hv) {
return
std
::
make_shared
<
Tensor
>
(
hv
);
}
DeviceTensorND
Tensor
::
dev_tensor
()
{
void
Tensor
::
to_contiguous_inplace
(
VarNode
::
LayoutConstraintCallback
&
layout_checker
)
{
MGB_LOCK_GUARD
(
m_blob_mtx
);
if
(
!
m_layout
.
is_empty
()
&&
!
layout_checker
(
m_layout
))
{
DeviceTensorStorage
storage
;
storage
.
reset
(
m_cn
,
m_blob
->
size
(),
m_blob
->
storage
());
storage
=
storage
.
sub
(
m_offset
);
DeviceTensorND
dv
;
dv
.
reset
(
storage
,
m_layout
);
DeviceTensorND
dv_contig
;
dv_contig
.
copy_from
(
dv
);
m_layout
=
dv_contig
.
layout
();
std
::
atomic_store
(
&
m_blob
,
Blob
::
make
(
dv_contig
.
storage
()));
mgb_assert
(
m_layout
.
is_contiguous
());
m_offset
=
0
;
}
}
void
Tensor
::
to_contiguous_inplace
()
{
static
VarNode
::
LayoutConstraintCallback
default_cb
=
[](
const
TensorLayout
&
layout
)
{
return
layout
.
is_contiguous
();
};
to_contiguous_inplace
(
default_cb
);
}
void
Tensor
::
assign_from_dev_tensor
(
DeviceTensorND
dv
)
{
MGB_LOCK_GUARD
(
m_blob_mtx
);
std
::
atomic_store
(
&
m_blob
,
Blob
::
make
(
dv
.
storage
()));
m_offset
=
dv
.
storage
().
offset
();
m_layout
=
dv
.
layout
();
}
DeviceTensorND
Tensor
::
dev_tensor
(
bool
contiguous
)
{
mgb_assert
(
m_blob
,
"uninitialized tensor."
);
if
(
contiguous
)
{
to_contiguous_inplace
();
}
MGB_LOCK_GUARD
(
m_blob_mtx
);
DeviceTensorStorage
storage
;
storage
.
reset
(
m_
blob
->
comp_node
()
,
m_blob
->
size
(),
m_blob
->
storage
());
storage
.
reset
(
m_
cn
,
m_blob
->
size
(),
m_blob
->
storage
());
storage
=
storage
.
sub
(
m_offset
);
DeviceTensorND
ret
;
ret
.
reset
(
storage
,
m_layout
);
...
...
@@ -156,16 +213,22 @@ DeviceTensorND Tensor::dev_tensor() {
}
void
Tensor
::
fetch_value
()
{
MGB_LOCK_GUARD
(
m_mtx
);
MGB_LOCK_GUARD
(
m_blob_mtx
);
MGB_LOCK_GUARD
(
m_value_mtx
);
if
(
m_value
.
empty
())
{
m_value
.
copy_from
(
dev_tensor
());
DeviceTensorStorage
storage
;
storage
.
reset
(
m_cn
,
m_blob
->
size
(),
m_blob
->
storage
());
storage
=
storage
.
sub
(
m_offset
);
DeviceTensorND
dv
;
dv
.
reset
(
storage
,
m_layout
);
m_value
.
copy_from
(
dv
);
m_value_ready
.
reset
(
EventPool
::
without_timer
().
alloc
(
comp_node
()));
m_value_ready
->
record
();
}
}
bool
Tensor
::
value_fetched
()
{
MGB_LOCK_GUARD
(
m_mtx
);
MGB_LOCK_GUARD
(
m_
value_
mtx
);
return
m_value
.
layout
().
ndim
!=
0
;
}
...
...
@@ -178,7 +241,7 @@ const HostTensorND& Tensor::get_value() {
}
const
HostTensorND
*
Tensor
::
try_get_value
()
{
MGB_LOCK_GUARD
(
m_mtx
);
MGB_LOCK_GUARD
(
m_
value_
mtx
);
if
(
!
m_value
.
empty
()
&&
(
!
m_value_ready
||
m_value_ready
->
finished
()))
{
return
&
m_value
;
}
...
...
@@ -193,7 +256,7 @@ TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) {
}
TensorPtr
Tensor
::
sub
(
size_t
offset
,
TensorShape
shape
)
{
TensorLayout
layout
(
shape
,
m_
layout
.
dtype
);
TensorLayout
layout
(
shape
,
m_dtype
);
return
Tensor
::
make
(
m_blob
,
offset
+
m_offset
,
layout
);
}
...
...
imperative/src/impl/proxy_graph.cpp
浏览文件 @
e400b7ff
...
...
@@ -73,7 +73,7 @@ public:
static
SymbolVar
make
(
ComputingGraph
&
graph
,
Tensor
&
tensor
)
{
auto
opr
=
graph
.
insert_opr
(
std
::
make_unique
<
InputPlaceholder
>
(
graph
,
&
tensor
));
auto
var
=
opr
->
output
(
0
);
auto
&&
dev_tensor
=
tensor
.
dev_tensor
();
auto
&&
dev_tensor
=
tensor
.
dev_tensor
(
false
);
var
->
m_comp_node
=
dev_tensor
.
comp_node
();
var
->
m_shape
=
dev_tensor
.
shape
();
if
(
dev_tensor
.
empty
())
{
...
...
@@ -81,10 +81,7 @@ public:
layout
.
init_contiguous_stride
();
dev_tensor
.
reset
(
dev_tensor
.
storage
(),
layout
);
}
var
->
m_dev_tensor
=
dev_tensor
;
var
->
m_mem_plan
.
reset_from_owner_var
()
.
chunk
()
.
mem_alloc_status
.
set_from_owner_var
();
var
->
force_assign_dev_tensor_from_tensor
(
dev_tensor
);
return
var
;
}
...
...
imperative/src/impl/proxy_graph/mini_graph.h
浏览文件 @
e400b7ff
...
...
@@ -314,15 +314,11 @@ public:
size_t
idx
=
0
;
for
(
auto
&&
input
:
opr_inputs
)
{
mgb_assert
(
input
->
owner_opr
()
->
same_type
<
InputPlaceholder
>
());
input
->
m_dev_tensor
.
storage
({});
auto
&&
dev_tensor
=
inputs
[
input_remap
[
idx
]]
->
dev_tensor
();
auto
&&
dev_tensor
=
inputs
[
input_remap
[
idx
]]
->
dev_tensor
(
false
);
auto
&&
layout
=
dev_tensor
.
layout
();
input
->
shape
(
dev_tensor
.
shape
());
auto
&&
chk
=
input
->
m_mem_plan
.
reset_from_owner_var
().
chunk
();
input
->
m_dev_tensor
.
reset
(
dev_tensor
.
storage
(),
layout
);
input
->
m_mem_plan
.
layout
(
layout
);
chk
.
mem_alloc_status
.
set_from_owner_var
();
input
->
force_assign_dev_tensor_from_tensor
(
dev_tensor
);
mgb_assert
(
input
->
comp_node
()
==
dev_tensor
.
comp_node
());
mgb_assert
(
input
->
shape
().
eq_shape
(
layout
));
...
...
@@ -335,9 +331,14 @@ public:
mgb_assert
(
m_opr
->
usable_output
().
size
()
==
outputs
.
size
());
::
mgb
::
opr
::
intl
::
WorkspaceLimitHook
::
set_impl
(
m_opr
->
owner_graph
(),
get_workspace_limit
);
size_t
j
=
0
;
for
(
auto
&&
var
:
m_opr
->
output
())
{
auto
&&
chk
=
var
->
m_mem_plan
.
reset_from_owner_var
().
chunk
();
chk
.
mem_alloc_status
.
set_from_owner_var
();
}
m_opr
->
mem_plan_fwd_in2out_readonly
();
size_t
j
=
0
;
for
(
auto
&&
var
:
m_opr
->
output
())
{
if
(
var
->
contain_flag
(
VarNode
::
Flag
::
VOLATILE_CONTENT
))
{
TensorLayout
layout
{
var
->
shape
(),
var
->
dtype
(),
var
->
format
()};
var
->
m_dev_tensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
...
...
@@ -349,18 +350,16 @@ public:
mgb_assert
(
var
->
comp_node
()
==
tensor
->
comp_node
());
mgb_assert
(
var
->
shape
().
eq_shape
(
layout
));
mgb_assert
(
var
->
dtype
()
==
layout
.
dtype
);
var
->
assign_dev_tensor_from_tensor
(
tensor
->
dev_tensor
());
if
(
var
->
m_mem_plan
.
chunk
().
owner_var
!=
var
)
{
tensor
->
assign_from_dev_tensor
(
var
->
m_dev_tensor
);
// memory forwarding
}
else
{
var
->
assign_dev_tensor_from_tensor
(
tensor
->
dev_tensor
());
}
++
j
;
}
chk
.
mem_alloc_status
.
set_from_owner_var
();
}
mgb_assert
(
j
==
outputs
.
size
());
// Memory forwarding was bypassed in megbrain with graph option
// imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly
// to initialize some opr(e.g. Subtensor)'s internal state
// TODO: implement memory forwarding
m_opr
->
mem_plan_fwd_in2out_readonly
();
{
// some opr (e.g. Reduce) rely on on_mem_status_changed to set
// input/output tensor corretly, since we bypass var_node_mem_mgr
...
...
@@ -840,7 +839,7 @@ public:
Tensor
::
make
(
output_descs
[
i
].
layout
,
output_descs
[
i
].
comp_node
);
}
auto
raw_outputs
=
to_raw_ptr_array
(
outputs
);
auto
raw_outputs
=
to_raw_ptr_array
(
outputs
,
false
);
CompNode
::
UnorderedSet
used_cns
;
for
(
auto
&&
out
:
raw_outputs
)
{
auto
cn
=
out
->
comp_node
();
...
...
imperative/src/impl/proxy_graph/proxy_graph.cpp
浏览文件 @
e400b7ff
...
...
@@ -9,8 +9,12 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "../mgb_cg_impl.h"
#include "./mini_graph.h"
#include "megbrain/opr/io.h"
using
LayoutConstraintLevel
=
mgb
::
cg
::
VarNodeMemManager
::
LayoutConstraintLevel
;
using
LayoutConstraintCallback
=
mgb
::
VarNode
::
LayoutConstraintCallback
;
namespace
mgb
::
imperative
::
proxy_graph
{
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
ProxyGraph
::
InputPlaceholder
);
...
...
@@ -34,4 +38,81 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
return
ret
;
}
std
::
unordered_map
<
size_t
,
SmallVector
<
LayoutConstraintCallback
>>
input_layout_constraints_cache
;
SmallVector
<
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
auto
get_input_layout_constraint_hash_key
=
[](
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
XXHash
state
;
size_t
length
=
0
,
data
[
1
+
inputs
.
size
()];
data
[
length
++
]
=
def
.
hash
();
for
(
auto
&&
i
:
inputs
)
{
data
[
length
++
]
=
mgb
::
hash
(
i
->
comp_node
());
}
state
.
update
(
data
,
length
*
sizeof
(
size_t
));
return
state
.
digest
();
};
auto
hash_key
=
get_input_layout_constraint_hash_key
(
def
,
inputs
);
auto
&&
iter
=
input_layout_constraints_cache
.
find
(
hash_key
);
if
(
iter
!=
input_layout_constraints_cache
.
end
())
{
return
iter
->
second
;
}
static
cg
::
ComputingGraphImpl
*
graph
=
imperative
::
ResourceManager
::
create_global
<
cg
::
ComputingGraphImpl
>
();
VarNodeArray
vinputs
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
OperatorNodeConfig
config
;
auto
&&
layout
=
inputs
[
i
]
->
layout
();
layout
.
init_contiguous_stride
();
vinputs
[
i
]
=
graph
->
insert_opr
(
std
::
make_unique
<
mgb
::
opr
::
SharedDeviceTensor
>
(
*
graph
,
std
::
make_shared
<
DeviceTensorND
>
(
inputs
[
i
]
->
comp_node
(),
layout
),
false
,
config
))
->
output
(
0
);
}
auto
&&
opr
=
OpDef
::
apply_on_var_node
(
def
,
vinputs
)[
0
]
->
owner_opr
();
opr
->
add_input_layout_constraint
();
SmallVector
<
LayoutConstraintCallback
>
res
(
inputs
.
size
());
auto
&
mem_mgr
=
graph
->
var_node_mem_manager
();
for
(
size_t
i
=
0
;
i
<
vinputs
.
size
();
++
i
)
{
auto
&
trait
=
mem_mgr
.
get_var_node_mem_trait
(
vinputs
[
i
]);
switch
(
trait
.
layout_constraint
.
level
)
{
case
LayoutConstraintLevel
::
CONTIG
:
res
[
i
]
=
[](
const
TensorLayout
&
layout
)
{
return
layout
.
is_contiguous
();
};
break
;
case
LayoutConstraintLevel
::
MONOTONE
:
res
[
i
]
=
[
&
trait
](
const
TensorLayout
&
layout
)
{
if
(
!
layout
.
is_abs_monotonous_allow_brdcst
())
{
return
false
;
}
for
(
auto
&&
i
:
trait
.
layout_constraint
.
custom
)
if
(
!
i
(
layout
))
return
false
;
return
true
;
};
break
;
case
LayoutConstraintLevel
::
NONE
:
if
(
!
trait
.
layout_constraint
.
custom
.
empty
())
{
res
[
i
]
=
[
&
trait
](
const
TensorLayout
&
layout
)
{
for
(
auto
&&
i
:
trait
.
layout_constraint
.
custom
)
if
(
!
i
(
layout
))
return
false
;
return
true
;
};
}
break
;
default:
mgb_throw
(
InternalError
,
"invalid layout_constraint_level"
);
}
}
input_layout_constraints_cache
.
emplace
(
hash_key
,
res
);
return
res
;
}
}
// namespace mgb::imperative::proxy_graph_detail
imperative/src/impl/subgraph_detail.cpp
浏览文件 @
e400b7ff
...
...
@@ -17,6 +17,8 @@
#include "./op_trait.h"
using
LayoutConstraintCallback
=
mgb
::
VarNode
::
LayoutConstraintCallback
;
namespace
mgb
{
namespace
imperative
{
namespace
subgraph_detail
{
...
...
@@ -73,6 +75,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
const
std
::
shared_ptr
<
OpDef
>&
op
,
const
SmallVector
<
TensorPtr
>&
inputs
,
size_t
nr_outputs
)
{
auto
&&
constraints
=
OpDef
::
get_input_layout_constraint
(
*
op
,
inputs
);
for
(
size_t
idx
=
0
;
idx
<
inputs
.
size
();
++
idx
)
{
auto
&&
layout_checker
=
constraints
[
idx
];
if
(
layout_checker
)
{
inputs
[
idx
]
->
to_contiguous_inplace
(
layout_checker
);
}
}
// do not use infered output_desc in subgraph
return
OpDef
::
apply_on_physical_tensor
(
*
op
,
inputs
,
output_descs
,
false
);
};
...
...
@@ -81,6 +90,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
return
outputs
;
}
SmallVector
<
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
SmallVector
<
LayoutConstraintCallback
>
res
(
inputs
.
size
());
return
res
;
}
static
EncodedSubgraph
make_backward_graph_from_forward
(
const
SmallVector
<
LogicalTensorDesc
>&
inputs
,
const
SmallVector
<
bool
>&
input_requires_grad
,
...
...
imperative/src/include/megbrain/imperative/op_def.h
浏览文件 @
e400b7ff
...
...
@@ -78,6 +78,9 @@ public:
static
EncodedSubgraph
make_forward_graph
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
);
static
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
);
const
OpTrait
*
trait
()
const
;
std
::
string
to_string
()
const
;
...
...
imperative/src/include/megbrain/imperative/physical_tensor.h
浏览文件 @
e400b7ff
...
...
@@ -14,6 +14,7 @@
#include <memory>
#include <mutex>
#include "megbrain/graph.h"
#include "megbrain/imperative/resource_manager.h"
#include "megbrain/tensor.h"
...
...
@@ -90,18 +91,24 @@ public:
CompNode
comp_node
()
const
{
mgb_assert
(
m_blob
,
"uninitialized tensor."
);
return
m_
blob
->
comp_node
()
;
return
m_
cn
;
}
DType
dtype
()
const
{
return
m_
layout
.
dtype
;
}
DType
dtype
()
const
{
return
m_dtype
;
}
TensorLayout
layout
()
const
{
return
m_layout
;
}
const
TensorShape
&
shape
()
const
{
return
m_
layout
;
}
const
TensorShape
&
shape
()
const
{
return
m_
shape
;
}
size_t
offset
()
const
{
return
m_offset
;
}
DeviceTensorND
dev_tensor
();
void
to_contiguous_inplace
(
VarNode
::
LayoutConstraintCallback
&
);
void
to_contiguous_inplace
();
DeviceTensorND
dev_tensor
(
bool
contiguous
=
true
);
void
assign_from_dev_tensor
(
DeviceTensorND
);
static
TensorPtr
make_scalar
(
DTypeScalar
value
,
CompNode
cn
);
...
...
@@ -110,7 +117,7 @@ public:
return
make_scalar
(
value
,
m_blob
->
comp_node
());
}
BlobPtr
&
blob
()
{
return
m_blob
;
}
BlobPtr
blob
()
{
return
m_blob
;
}
void
fetch_value
();
bool
value_fetched
();
...
...
@@ -131,10 +138,16 @@ public:
static
void
static_initialize
();
private:
TensorLayout
m_layout
;
BlobPtr
m_blob
;
size_t
m_offset
;
std
::
mutex
m_mtx
;
const
CompNode
m_cn
;
const
TensorShape
m_shape
;
const
DType
m_dtype
;
std
::
mutex
m_blob_mtx
;
BlobPtr
m_blob
;
TensorLayout
m_layout
;
std
::
mutex
m_value_mtx
;
HostTensorND
m_value
;
EventPtr
m_value_ready
=
nullptr
;
};
...
...
imperative/src/include/megbrain/imperative/proxy_graph_detail.h
浏览文件 @
e400b7ff
...
...
@@ -33,6 +33,9 @@ EncodedSubgraph make_backward_graph(
const
SmallVector
<
bool
>&
input_requires_grad
,
const
SmallVector
<
bool
>&
output_has_grad
);
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
);
}
// namespace proxy_graph_detail
}
// namespace imperative
}
// namespace mgb
...
...
imperative/src/include/megbrain/imperative/subgraph_detail.h
浏览文件 @
e400b7ff
...
...
@@ -36,6 +36,9 @@ EncodedSubgraph make_backward_graph(
const
SmallVector
<
bool
>&
input_requires_grad
,
const
SmallVector
<
bool
>&
output_has_grad
);
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
);
}
// namespace subgraph_detail
}
// namespace imperative
}
// namespace mgb
\ No newline at end of file
src/core/impl/graph/cg_impl.cpp
浏览文件 @
e400b7ff
...
...
@@ -322,7 +322,7 @@ void ComputingGraphImpl::free_varnode_storage(void* ptr) {
m_var_node_pool
.
free_raw
(
ptr
);
};
OperatorNodeBase
*
ComputingGraphImpl
::
insert_opr
(
MGE_WIN_DECLSPEC_FUC
OperatorNodeBase
*
ComputingGraphImpl
::
insert_opr
(
std
::
unique_ptr
<
OperatorNodeBase
>
opr_uniqp
)
{
auto
opr
=
opr_uniqp
.
get
();
...
...
src/core/impl/graph/cg_impl.h
浏览文件 @
e400b7ff
...
...
@@ -148,8 +148,8 @@ class ComputingGraphImpl final : public ComputingGraph {
public:
class
ComputingSequence
;
ComputingGraphImpl
();
~
ComputingGraphImpl
();
MGE_WIN_DECLSPEC_FUC
ComputingGraphImpl
();
MGE_WIN_DECLSPEC_FUC
~
ComputingGraphImpl
();
template
<
typename
T
>
static
ComputingGraphImpl
*
downcast
(
T
*
ptr
)
=
delete
;
...
...
@@ -166,7 +166,8 @@ public:
SmallVector
<
std
::
unique_ptr
<
AsyncExecutable
>>
compile_multi_part
(
const
SmallVector
<
OutputSpec
>&
out_specs
)
override
;
OperatorNodeBase
*
insert_opr
(
std
::
unique_ptr
<
OperatorNodeBase
>
opr
)
override
;
MGE_WIN_DECLSPEC_FUC
OperatorNodeBase
*
insert_opr
(
std
::
unique_ptr
<
OperatorNodeBase
>
opr
)
override
;
void
*
alloc_varnode_storage
()
override
;
...
...
src/core/impl/graph/var_node.cpp
浏览文件 @
e400b7ff
...
...
@@ -93,6 +93,23 @@ MemAllocPlan& MemAllocPlan::assign_for_forward(
return
*
this
;
}
MemAllocPlan
&
MemAllocPlan
::
force_assign_for_forward
(
const
MemAllocPlan
&
src
,
const
SubTensorSpec
&
sub
)
{
mgb_assert
(
valid
()
&&
src
.
valid
()
&&
m_layout
.
eq_shape
(
sub
.
layout
()));
++
(
m_chunk
=
src
.
m_chunk
)
->
m_refcnt
;
m_layout
=
sub
.
layout
();
// make layout strong-contig
for
(
int
i
=
static_cast
<
int
>
(
m_layout
.
ndim
)
-
1
;
i
>=
0
;
--
i
)
{
if
(
m_layout
.
shape
[
i
]
==
1
)
{
m_layout
.
stride
[
i
]
=
i
+
1
<
static_cast
<
int
>
(
m_layout
.
ndim
)
?
m_layout
.
stride
[
i
+
1
]
*
m_layout
.
shape
[
i
+
1
]
:
1
;
}
}
m_layout
.
dtype
=
dtype
();
return
*
this
;
}
MemAllocPlan
&
MemAllocPlan
::
reset_from_owner_var
()
{
auto
owner_var
=
m_chunk_storage
.
owner_var
;
m_layout
.
dtype
=
dtype
();
...
...
@@ -223,7 +240,12 @@ VarNode& VarNode::format(TensorFormat format) {
bool
VarNode
::
set_fwd_in2out_readonly
(
VarNode
*
input
,
const
SubTensorSpec
&
sub
)
{
if
(
owner_graph
()
->
options
().
imperative_proxy_graph
)
{
return
false
;
if
(
input
->
comp_node
()
!=
comp_node
())
{
return
false
;
}
m_mem_plan
.
force_assign_for_forward
(
input
->
m_mem_plan
,
sub
);
m_dev_tensor
=
input
->
dev_tensor
().
sub
(
sub
);
return
true
;
}
return
ComputingGraphImpl
::
downcast
(
owner_graph
())
->
var_node_mem_manager
()
...
...
@@ -361,6 +383,13 @@ VarNode& VarNode::reset_dev_tensor_from_tensor(const DeviceTensorND& value) {
return
*
this
;
}
void
VarNode
::
force_assign_dev_tensor_from_tensor
(
const
DeviceTensorND
&
value
)
{
m_dev_tensor
=
value
;
shape
(
value
.
shape
());
m_mem_plan
.
reset_from_owner_var
().
chunk
().
mem_alloc_status
.
set_from_owner_var
();
m_mem_plan
.
layout
(
value
.
layout
());
}
void
VarNode
::
assign_dev_tensor_from_tensor
(
const
DeviceTensorND
&
value
)
{
mgb_assert
(
(
value
.
layout
().
is_contiguous
()
||
value
.
empty
())
&&
...
...
src/core/impl/tensor.cpp
浏览文件 @
e400b7ff
...
...
@@ -475,7 +475,7 @@ DEF(CompNode node, const TensorShape& shape, DType dtype, TensorFormat format)
DEF
(
CompNode
node
,
const
TensorLayout
&
layout
)
:
TensorND
(
node
,
layout
,
layout
.
dtype
,
layout
.
format
)
{
mgb_assert
(
layout
.
is_contiguous
(),
layout
.
is_contiguous
()
||
layout
.
is_empty
()
,
"non-contiguous layout used for initializing a tensor: %s"
,
layout
.
to_string
().
c_str
());
}
...
...
src/core/include/megbrain/graph/cg.h
浏览文件 @
e400b7ff
...
...
@@ -241,7 +241,8 @@ public:
* \return the node in the graph (maybe another node due to
* deduplication)
*/
virtual
OperatorNodeBase
*
insert_opr
(
std
::
unique_ptr
<
OperatorNodeBase
>
opr
)
=
0
;
MGE_WIN_DECLSPEC_FUC
virtual
OperatorNodeBase
*
insert_opr
(
std
::
unique_ptr
<
OperatorNodeBase
>
opr
)
=
0
;
/*!
* \brief used by OperatorNodeBase to allocate its outputs
...
...
src/core/include/megbrain/graph/var_node.h
浏览文件 @
e400b7ff
...
...
@@ -194,6 +194,10 @@ public:
MGE_WIN_DECLSPEC_FUC
MemAllocPlan
&
assign_for_forward
(
const
MemAllocPlan
&
src
,
const
SubTensorSpec
&
sub
);
//! force assign for readonly forward
MGE_WIN_DECLSPEC_FUC
MemAllocPlan
&
force_assign_for_forward
(
const
MemAllocPlan
&
src
,
const
SubTensorSpec
&
sub
);
/*!
* \brief next readonly-forward reader of this MemAllocPlan
*
...
...
@@ -509,6 +513,9 @@ public:
//! NO_SYS_MEM_ALLOC can be modified.
MGE_WIN_DECLSPEC_FUC
bool
is_graph_dest_varnode
();
MGE_WIN_DECLSPEC_FUC
void
force_assign_dev_tensor_from_tensor
(
const
DeviceTensorND
&
value
);
private:
//! whether its memory should be allocated by mgb system during graph
//! execution; initialized in VarNodeMemManager::reset_opr_seq()
...
...
src/opr/include/megbrain/opr/io.h
浏览文件 @
e400b7ff
...
...
@@ -24,7 +24,7 @@ namespace intl {
* \brief base class for IO nodes between device and host
*/
class
HostIONodeBase
:
public
cg
::
SingleCNOperatorNodeBase
{
void
init_output_static_infer_desc
()
override
final
;
MGE_WIN_DECLSPEC_FUC
void
init_output_static_infer_desc
()
override
final
;
protected:
using
cg
::
SingleCNOperatorNodeBase
::
SingleCNOperatorNodeBase
;
...
...
@@ -32,9 +32,10 @@ protected:
/*!
* \brief src_type for static shape and value infer
*/
virtual
cg
::
static_infer
::
SourceType
static_infer_src_type
()
const
;
MGE_WIN_DECLSPEC_FUC
virtual
cg
::
static_infer
::
SourceType
static_infer_src_type
()
const
;
virtual
const
TensorShape
&
get_output_shape
()
=
0
;
MGE_WIN_DECLSPEC_FUC
virtual
const
TensorShape
&
get_output_shape
()
=
0
;
/*!
* \brief fill value in *dest* for static inference
...
...
@@ -52,10 +53,10 @@ protected:
class
DeviceTensorHolder
:
public
HostIONodeBase
{
class
DevValueExecDep
;
void
init_output_format
()
override
;
void
init_output_mem_plan
(
bool
dynamic
)
override
final
;
void
scn_do_execute
()
override
final
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
MGE_WIN_DECLSPEC_FUC
void
init_output_format
()
override
;
MGE_WIN_DECLSPEC_FUC
void
init_output_mem_plan
(
bool
dynamic
)
override
final
;
MGE_WIN_DECLSPEC_FUC
void
scn_do_execute
()
override
final
;
MGE_WIN_DECLSPEC_FUC
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
protected:
using
HostIONodeBase
::
HostIONodeBase
;
...
...
@@ -77,20 +78,20 @@ MGB_DEFINE_CLS_WITH_SUPER(SharedDeviceTensorBase, DeviceTensorHolder) // {
std
::
shared_ptr
<
DeviceTensorND
>
m_dev_data
;
bool
m_const_value
;
const
TensorShape
&
get_output_shape
()
override
;
MGE_WIN_DECLSPEC_FUC
const
TensorShape
&
get_output_shape
()
override
;
bool
fill_in_static_infer
(
DeviceTensorND
*
dest
)
override
{
MGB_MARK_USED_VAR
(
dest
);
return
false
;
}
void
init_output_comp_node
()
override
;
MGE_WIN_DECLSPEC_FUC
void
init_output_comp_node
()
override
;
public:
//! const_value marks whether the device value of this operator should
//! be treated as constant during graph execution. Should be false in
//! most cases.
SharedDeviceTensorBase
(
MGE_WIN_DECLSPEC_FUC
SharedDeviceTensorBase
(
ComputingGraph
&
graph
,
const
std
::
shared_ptr
<
DeviceTensorND
>&
dev_data
,
bool
const_value
,
const
OperatorNodeConfig
&
config
);
...
...
@@ -248,7 +249,8 @@ private:
*/
MGB_DEFINE_OPR_CLASS_WITH_EXPORT
(
SharedDeviceTensor
,
intl
::
SharedDeviceTensorBase
)
// {
cg
::
static_infer
::
SourceType
static_infer_src_type
()
const
override
;
MGE_WIN_DECLSPEC_FUC
cg
::
static_infer
::
SourceType
static_infer_src_type
()
const
override
;
public
:
using
Super
::
Super
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录