Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d19fc2c1
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d19fc2c1
编写于
6月 06, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(imperative): add alloc TensorPtr in imperative
GitOrigin-RevId: 1b438fc436cc752f5437b2bd047006f0e0b1b2e5
上级
d1b6c040
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
177 addition
and
212 deletion
+177
-212
imperative/src/impl/dnn_op_helper.h
imperative/src/impl/dnn_op_helper.h
+10
-4
imperative/src/impl/ops/adaptive_pooling.cpp
imperative/src/impl/ops/adaptive_pooling.cpp
+4
-9
imperative/src/impl/ops/batch_norm.cpp
imperative/src/impl/ops/batch_norm.cpp
+25
-35
imperative/src/impl/ops/cond_take.cpp
imperative/src/impl/ops/cond_take.cpp
+2
-3
imperative/src/impl/ops/convolution.cpp
imperative/src/impl/ops/convolution.cpp
+21
-37
imperative/src/impl/ops/elemwise.cpp
imperative/src/impl/ops/elemwise.cpp
+7
-6
imperative/src/impl/ops/indexing.cpp
imperative/src/impl/ops/indexing.cpp
+6
-8
imperative/src/impl/ops/lamb.cpp
imperative/src/impl/ops/lamb.cpp
+10
-14
imperative/src/impl/ops/layer_norm.cpp
imperative/src/impl/ops/layer_norm.cpp
+11
-18
imperative/src/impl/ops/matmul.cpp
imperative/src/impl/ops/matmul.cpp
+26
-34
imperative/src/impl/ops/misc.cpp
imperative/src/impl/ops/misc.cpp
+2
-3
imperative/src/impl/ops/pooling.cpp
imperative/src/impl/ops/pooling.cpp
+4
-9
imperative/src/impl/ops/reduce.cpp
imperative/src/impl/ops/reduce.cpp
+18
-23
imperative/src/impl/ops/tensor_manip.cpp
imperative/src/impl/ops/tensor_manip.cpp
+2
-3
imperative/src/impl/ops/vision.cpp
imperative/src/impl/ops/vision.cpp
+2
-2
imperative/src/impl/physical_tensor.cpp
imperative/src/impl/physical_tensor.cpp
+5
-1
imperative/src/include/megbrain/imperative/physical_tensor.h
imperative/src/include/megbrain/imperative/physical_tensor.h
+19
-0
src/opr/impl/basic_arith.cpp
src/opr/impl/basic_arith.cpp
+2
-2
src/opr/include/megbrain/opr/basic_arith.h
src/opr/include/megbrain/opr/basic_arith.h
+1
-1
未找到文件。
imperative/src/impl/dnn_op_helper.h
浏览文件 @
d19fc2c1
...
...
@@ -27,10 +27,16 @@ struct DnnOprCaller {
return
mgb
::
opr
::
intl
::
create_megdnn_opr
<
Opr
>
(
cn
);
}
megdnn
::
Workspace
create_workspace
(
TensorLayout
layout
)
{
dev_tensor
=
Tensor
::
make
(
layout
,
cn
)
->
dev_tensor
();
workspace
=
megdnn
::
Workspace
(
dev_tensor
.
raw_ptr
(),
dev_tensor
.
storage
().
size
());
Workspace
create_workspace
(
size_t
sz
)
{
if
(
workspace
.
raw_ptr
)
{
mgb_throw
(
MegBrainError
,
"workspace should not be applicated many times"
);
}
if
(
sz
)
{
TensorLayout
layout
({
sz
},
dtype
::
Byte
());
dev_tensor
=
Tensor
::
make
(
layout
,
cn
)
->
dev_tensor
();
workspace
=
megdnn
::
Workspace
(
dev_tensor
.
raw_ptr
(),
dev_tensor
.
storage
().
size
());
}
return
workspace
;
}
...
...
imperative/src/impl/ops/adaptive_pooling.cpp
浏览文件 @
d19fc2c1
...
...
@@ -135,21 +135,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
param
.
window_w
=
IW
-
(
OW
-
1
)
*
param
.
stride_w
;
TensorND
src
=
inputs
[
0
]
->
dnn_tensor
();
DeviceTensorND
dst
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
auto
dst
=
Tensor
::
make
(
dst_layout
,
cn
);
size_t
sz
=
setup_algo
<
megdnn
::
Pooling
>
(
{
src_layout
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
::
megdnn
::
param
::
ExecutionPolicy
{},
false
);
megdnn
::
Workspace
dnn_wk
;
if
(
sz
)
{
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
}
dnn_opr
.
op
->
exec
(
src
,
dst
.
as_megdnn
(),
dnn_wk
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
src
,
dst
->
dnn_tensor
(),
dnn_wk
);
return
{
Tensor
::
make
(
dst
)
};
return
{
dst
};
}
OP_TRAIT_REG
(
AdaptivePooling
,
AdaptivePooling
)
...
...
imperative/src/impl/ops/batch_norm.cpp
浏览文件 @
d19fc2c1
...
...
@@ -160,10 +160,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
bool
empty_input
=
src_layout
.
is_empty
();
size_t
nr_inp
=
inputs
.
size
();
DeviceTensorND
reserve
;
size_t
sz
=
0
,
rsz
=
0
;
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
TensorLayout
r_layout
({
rsz
},
dtype
::
Byte
());
if
(
!
empty_input
)
{
...
...
@@ -172,79 +170,71 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
src_layout
,
src_layout
,
src_layout
);
rsz
=
dnn_opr
.
op
->
get_reserve_in_bytes
(
src_layout
);
w_layout
=
TensorLayout
({
sz
},
dtype
::
Byte
());
r_layout
=
TensorLayout
({
rsz
},
dtype
::
Byte
());
}
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
reserve
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
r_layout
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
auto
reserve
=
Tensor
::
make
(
r_layout
,
comp_node
);
// alloc memory
DeviceTensorND
y
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
src_layout
);
auto
y
=
Tensor
::
make
(
src_layout
,
comp_node
);
DeviceTensorND
save_mean
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
scale_layout
);
DeviceTensorND
save_variance
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
scale_layout
);
auto
save_mean
=
Tensor
::
make
(
scale_layout
,
comp_node
);
auto
save_variance
=
Tensor
::
make
(
scale_layout
,
comp_node
);
if
(
op_def
.
fwd_mode
==
::
megdnn
::
param
::
BN
::
FwdMode
::
INFERENCE
)
{
if
(
!
empty_input
)
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
inp_tensornds
[
2
],
inp_tensornds
[
3
],
inp_tensornds
[
4
],
save_mean
.
as_megdnn
(),
save_variance
.
as_megdnn
(),
reserve
.
as_megdnn
(),
y
.
as_megdnn
(),
inp_tensornds
[
3
],
inp_tensornds
[
4
],
save_mean
->
dnn_tensor
(),
save_variance
->
dnn_tensor
(),
reserve
->
dnn_tensor
(),
y
->
dnn_tensor
(),
dnn_wk
);
return
{
inputs
[
3
],
inputs
[
4
],
Tensor
::
make
(
reserve
),
Tensor
::
make
(
y
)
};
return
{
inputs
[
3
],
inputs
[
4
],
reserve
,
y
};
}
else
{
DeviceTensorND
mean
,
variance
;
if
(
nr_inp
==
5
)
{
mean
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
scale_layout
);
variance
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
scale_layout
);
auto
mean
=
Tensor
::
make
(
scale_layout
,
comp_node
);
auto
variance
=
Tensor
::
make
(
scale_layout
,
comp_node
);
megdnn
::
RefPtr
src_ptr1
(
inp_tensornds
[
3
].
get_ref_ptr
().
get_ptr
(),
inputs
[
3
]
->
offset
());
megdnn
::
RefPtr
dst_ptr1
(
mean
.
storage
().
get_ref_ptr
(),
mean
.
storage
().
offset
(),
false
);
mean
->
dev_tensor
().
storage
().
get_ref_ptr
(),
mean
->
dev_tensor
().
storage
().
offset
(),
false
);
comp_node
.
peer_copy_to_ref
(
comp_node
,
dst_ptr1
,
src_ptr1
,
scale_layout
.
span
().
high_byte
);
megdnn
::
RefPtr
src_ptr2
(
inp_tensornds
[
4
].
get_ref_ptr
().
get_ptr
(),
inputs
[
4
]
->
offset
());
megdnn
::
RefPtr
dst_ptr2
(
variance
.
storage
().
get_ref_ptr
(),
variance
.
storage
().
offset
(),
false
);
variance
->
dev_tensor
().
storage
().
get_ref_ptr
(),
variance
->
dev_tensor
().
storage
().
offset
(),
false
);
comp_node
.
peer_copy_to_ref
(
comp_node
,
dst_ptr2
,
src_ptr2
,
scale_layout
.
span
().
high_byte
);
if
(
!
empty_input
)
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
inp_tensornds
[
2
],
mean
.
as_megdnn
(),
variance
.
as_megdnn
(),
save_mean
.
as_megdnn
(),
save_
variance
.
as_megdnn
(),
reserve
.
as_megdnn
(),
y
.
as_megdnn
(),
dnn_wk
);
mean
->
dnn_tensor
(),
variance
->
dnn_tensor
(),
save_
mean
->
dnn_tensor
(),
save_variance
->
dnn_tensor
(),
reserve
->
dnn_tensor
(),
y
->
dnn_tensor
(),
dnn_wk
);
return
{
Tensor
::
make
(
mean
),
Tensor
::
make
(
variance
),
Tensor
::
make
(
save_mean
),
Tensor
::
make
(
save_variance
),
Tensor
::
make
(
reserve
),
Tensor
::
make
(
y
)};
return
{
mean
,
variance
,
save_mean
,
save_variance
,
reserve
,
y
};
}
TensorLayout
m_layout
({
0
},
scale_layout
.
dtype
);
mean
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
m_layout
);
variance
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
m_layout
);
auto
mean
=
Tensor
::
make
(
m_layout
,
comp_node
);
auto
variance
=
Tensor
::
make
(
m_layout
,
comp_node
);
if
(
!
empty_input
)
{
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
inp_tensornds
[
2
],
mean
.
as_megdnn
(),
variance
.
as_megdnn
(),
save_mean
.
as_megdnn
(),
save_variance
.
as_megdnn
(),
reserve
.
as_megdnn
(),
y
.
as_megdnn
(),
mean
->
dnn_tensor
(),
variance
->
dnn_tensor
(),
save_mean
->
dnn_tensor
(),
save_variance
->
dnn_tensor
(),
reserve
->
dnn_tensor
(),
y
->
dnn_tensor
(),
dnn_wk
);
}
return
{
Tensor
::
make
(
save_mean
),
Tensor
::
make
(
save_variance
),
Tensor
::
make
(
reserve
),
Tensor
::
make
(
y
)};
return
{
save_mean
,
save_variance
,
reserve
,
y
};
}
}
...
...
imperative/src/impl/ops/cond_take.cpp
浏览文件 @
d19fc2c1
...
...
@@ -44,10 +44,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
DnnOprCaller
<
megdnn
::
CondTake
>
dnn_op
(
inp
->
comp_node
());
dnn_op
.
op
->
param
().
val
=
1
;
TensorLayout
m_layout
(
{
dnn_op
.
op
->
get_workspace_in_bytes
(
inp
->
layout
())},
dtype
::
Byte
());
size_t
sz
=
dnn_op
.
op
->
get_workspace_in_bytes
(
inp
->
layout
());
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
m_layout
);
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
sz
);
dnn_op
.
op
->
exec
(
inp
->
dev_tensor
().
as_megdnn
(),
msk
->
dev_tensor
().
as_megdnn
(),
...
...
imperative/src/impl/ops/convolution.cpp
浏览文件 @
d19fc2c1
...
...
@@ -165,11 +165,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout
empty_shp
({
0
},
inputs
[
0
]
->
dtype
());
empty_shp
.
ndim
=
0
;
DeviceTensorND
empty_bias
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
empty_shp
);
auto
empty_bias
=
Tensor
::
make
(
empty_shp
,
cn
);
inp_tensornds
[
2
]
=
empty_bias
.
as_megdnn
();
inp_tensornds
[
3
]
=
empty_bias
.
as_megdnn
();
inp_tensornds
[
2
]
=
empty_bias
->
dnn_tensor
();
inp_tensornds
[
3
]
=
empty_bias
->
dnn_tensor
();
size_t
sz
=
setup_algo
<
megdnn
::
ConvBiasForward
>
(
{
inp_shapes
[
0
],
inp_shapes
[
1
],
empty_shp
,
empty_shp
,
oup_shapes
[
0
]},
...
...
@@ -177,17 +176,15 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
&
inp_tensornds
);
// alloc memory
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
auto
out
=
Tensor
::
make
(
out_layout
,
cn
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
// exeucte
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
empty_bias
.
as_megdnn
()
,
empty_bias
.
as_megdnn
(),
out
.
as_megdnn
(),
nullptr
,
dnn_wk
);
return
{
Tensor
::
make
(
out
)
};
inp_tensornds
[
0
],
inp_tensornds
[
1
],
inp_tensornds
[
2
],
inp_tensornds
[
3
]
,
out
->
dnn_tensor
(),
nullptr
,
dnn_wk
);
return
{
out
};
}
OP_TRAIT_REG
(
Convolution
,
Convolution
,
opr
::
Convolution
)
...
...
@@ -368,6 +365,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
def
,
inputs
[
1
]
->
layout
().
ndim
,
inputs
[
0
]
->
layout
(),
inputs
[
1
]
->
layout
(),
cn
);
auto
out
=
Tensor
::
make
(
out_layout
,
cn
);
using
TensorND
=
megdnn
::
TensorND
;
SmallVector
<
TensorND
>
inp_tensornds
(
inputs
.
size
());
TensorLayoutArray
inp_shapes
(
inputs
.
size
()),
oup_shapes
(
output_descs
.
size
());
...
...
@@ -383,16 +382,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{
inp_shapes
[
0
],
inp_shapes
[
1
],
oup_shapes
[
0
]},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
convbwd
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
auto
wk
=
Blob
::
make
(
cn
,
sz
);
auto
ptr
=
wk
->
storage
().
get
();
megdnn
::
Workspace
dnn_wk
(
ptr
,
sz
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
// exeucte
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out
)
};
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
->
dnn_tensor
(),
dnn_wk
);
return
{
out
};
}
OP_TRAIT_REG
(
ConvolutionBackwardData
,
ConvolutionBackwardData
)
...
...
@@ -549,18 +543,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
false
,
cn
,
conv
.
policy
(),
false
,
&
inp_tensornds
);
// alloc memory
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
auto
out
=
Tensor
::
make
(
out_layout
,
cn
);
megdnn
::
Workspace
dnn_wk
;
if
(
sz
!=
0
)
{
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
}
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
// exeucte
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out
)
};
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
->
dnn_tensor
(),
dnn_wk
);
return
{
out
};
}
OP_TRAIT_REG
(
Convolution3D
,
Convolution3D
,
opr
::
Convolution3D
)
...
...
@@ -615,8 +604,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn
::
Convolution3DBackwardData
::
deduce_layout_impl
(
wlayout
,
dlayout
,
op_def
.
param
(),
oup_layout
);
}
DeviceTensorND
oup
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
auto
oup
=
Tensor
::
make
(
oup_layout
,
cn
);
SmallVector
<
megdnn
::
TensorND
>
inp_tensornds
(
inputs
.
size
());
inp_tensornds
[
0
]
=
inputs
[
0
]
->
dnn_tensor
();
...
...
@@ -624,14 +612,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t
wk_size
=
setup_algo
<
megdnn
::
Convolution3DBackwardData
>
(
{
wlayout
,
dlayout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
,
&
inp_tensornds
);
megdnn
::
Workspace
dnn_wk
;
if
(
wk_size
!=
0
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
auto
dnn_wk
=
caller
.
create_workspace
(
wk_size
);
dnn_opr
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
oup
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
oup
)
};
dnn_opr
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
oup
->
dnn_tensor
(),
dnn_wk
);
return
{
oup
};
}
auto
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
...
...
imperative/src/impl/ops/elemwise.cpp
浏览文件 @
d19fc2c1
...
...
@@ -121,10 +121,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn
::
Elemwise
::
deduce_shape
(
inp_shapes
,
layout
);
layout
.
init_contiguous_stride
();
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
layout
);
auto
out
=
Tensor
::
make
(
layout
,
comp_node
);
if
(
is_empty
)
{
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
DnnOprCaller
<
megdnn
::
Elemwise
>
dnn_opr
(
comp_node
);
...
...
@@ -133,12 +133,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dnn_opr
.
op
->
param
().
mode
==
Mode
::
FUSE_MUL_ADD4
||
(
inp_tensornds
.
size
()
&&
inp_tensornds
[
0
].
layout
.
dtype
.
category
()
==
DTypeCategory
::
QUANTIZED
))
{
opr
::
Elemwise
::
perform_dnn
(
comp_node
,
out
,
inp_tensornds
,
dnn_opr
.
op
);
opr
::
Elemwise
::
perform_dnn
(
comp_node
,
out
->
dnn_tensor
(),
inp_tensornds
,
dnn_opr
.
op
);
}
else
{
dnn_opr
.
op
->
exec
(
inp_tensornds
,
out
.
as_megdnn
());
dnn_opr
.
op
->
exec
(
inp_tensornds
,
out
->
dnn_tensor
());
}
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
MGB_DEFINE_OPR_CLASS
(
...
...
imperative/src/impl/ops/indexing.cpp
浏览文件 @
d19fc2c1
...
...
@@ -85,10 +85,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorPtr
out
=
Tensor
::
make
(
tlayout
,
inp
->
comp_node
());
megdnn
::
TensorND
in
=
inp
->
dnn_tensor
();
megdnn
::
TensorND
ind
=
index
->
dnn_tensor
();
TensorLayout
m_layout
(
{
dnn_op
.
op
->
get_workspace_in_bytes
(
layout
,
index_layout
,
tlayout
)},
dtype
::
Byte
());
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
m_layout
);
size_t
sz
=
dnn_op
.
op
->
get_workspace_in_bytes
(
layout
,
index_layout
,
tlayout
);
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
sz
);
dnn_op
.
op
->
exec
(
in
,
ind
,
out
->
dnn_tensor
(),
dnn_workspace
);
return
{
out
};
}
...
...
@@ -152,10 +151,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn
::
TensorND
in
=
inp
->
dnn_tensor
();
megdnn
::
TensorND
ind
=
index
->
dnn_tensor
();
megdnn
::
TensorND
su
=
sub
->
dnn_tensor
();
TensorLayout
m_layout
(
{
dnn_op
.
op
->
get_workspace_in_bytes
(
layout
,
index_layout
,
tlayout
)},
dtype
::
Byte
());
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
m_layout
);
size_t
sz
=
dnn_op
.
op
->
get_workspace_in_bytes
(
layout
,
index_layout
,
tlayout
);
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
sz
);
dnn_op
.
op
->
exec
(
out
->
dnn_tensor
(),
ind
,
su
,
dnn_workspace
);
return
{
out
};
}
...
...
imperative/src/impl/ops/lamb.cpp
浏览文件 @
d19fc2c1
...
...
@@ -45,29 +45,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout
v_t_1_layout
{
v_t_1
->
layout
()};
TensorLayout
lamb_param_layout
{
lamb_param
->
layout
()};
DeviceTensorND
m_t
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
m_t_1
->
comp_node
(),
m_t_1_layout
);
auto
m_t
=
Tensor
::
make
(
m_t_1_layout
,
m_t_1
->
comp_node
());
DeviceTensorND
v_t
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
v_t_1
->
comp_node
(),
v_t_1_layout
);
auto
v_t
=
Tensor
::
make
(
v_t_1_layout
,
v_t_1
->
comp_node
());
DeviceTensorND
new_param
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
lamb_param
->
comp_node
(),
lamb_param_layout
);
auto
new_param
=
Tensor
::
make
(
lamb_param_layout
,
lamb_param
->
comp_node
());
DnnOprCaller
<
megdnn
::
LAMBUpdate
>
caller
{
lamb_param
->
comp_node
()};
TensorLayout
m_layout
(
{
caller
.
op
->
get_workspace_in_bytes
(
m_t_1
->
layout
(),
v_t_1
->
layout
(),
lamb_param
->
layout
(),
grad
->
layout
(),
m_t
.
layout
(),
v_t
.
layout
(),
new_param
.
layout
())},
dtype
::
Byte
());
size_t
sz
=
caller
.
op
->
get_workspace_in_bytes
(
m_t_1
->
layout
(),
v_t_1
->
layout
(),
lamb_param
->
layout
(),
grad
->
layout
(),
m_t
->
layout
(),
v_t
->
layout
(),
new_param
->
layout
());
auto
dnn_workspace
=
caller
.
create_workspace
(
m_layout
);
auto
dnn_workspace
=
caller
.
create_workspace
(
sz
);
caller
.
op
->
param
()
=
op
.
param
();
caller
.
op
->
exec
(
m_t_1
->
dev_tensor
().
as_megdnn
(),
v_t_1
->
dev_tensor
().
as_megdnn
(),
lamb_param
->
dev_tensor
().
as_megdnn
(),
grad
->
dev_tensor
().
as_megdnn
(),
m_t
.
as_megdnn
(),
v_t
.
as_megdnn
(),
new_param
.
as_megdnn
(),
dnn_workspace
);
return
{
Tensor
::
make
(
m_t
),
Tensor
::
make
(
v_t
),
Tensor
::
make
(
new_param
)};
m_t
->
dnn_tensor
(),
v_t
->
dnn_tensor
(),
new_param
->
dnn_tensor
(),
dnn_workspace
);
return
{
m_t
,
v_t
,
new_param
};
}
OP_TRAIT_REG
(
LAMBUpdate
,
LAMBUpdate
)
...
...
imperative/src/impl/ops/layer_norm.cpp
浏览文件 @
d19fc2c1
...
...
@@ -77,32 +77,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn
::
LayerNorm
::
deduce_layout_fwd_impl
(
inputs
[
0
]
->
dnn_tensor
().
layout
,
p
,
oup_layout
,
mean_layout
,
rstd_layout
);
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
DeviceTensorND
mean_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
mean_layout
);
DeviceTensorND
rstd_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
rstd_layout
);
megdnn
::
Workspace
dnn_wk
;
auto
out
=
Tensor
::
make
(
oup_layout
,
cn
);
auto
mean
=
Tensor
::
make
(
mean_layout
,
cn
);
auto
rstd
=
Tensor
::
make
(
rstd_layout
,
cn
);
auto
wk_size
=
caller
.
op
->
get_workspace_in_bytes
(
inputs
[
0
]
->
dnn_tensor
().
layout
,
p
.
affine
?
inputs
[
1
]
->
dnn_tensor
().
layout
:
TensorLayout
(),
p
.
affine
?
inputs
[
2
]
->
dnn_tensor
().
layout
:
TensorLayout
(),
oup_layout
,
mean_layout
,
rstd_layout
);
if
(
wk_size
!=
0
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
auto
dnn_wk
=
caller
.
create_workspace
(
wk_size
);
dnn_opr
->
exec
(
caller
.
op
->
exec
(
inputs
[
0
]
->
dnn_tensor
(),
p
.
affine
?
inputs
[
1
]
->
dnn_tensor
()
:
megdnn
::
TensorND
(),
p
.
affine
?
inputs
[
2
]
->
dnn_tensor
()
:
megdnn
::
TensorND
(),
out_devtensor
.
as_megdnn
(),
mean_devtensor
.
as_megdnn
(),
rstd_devtensor
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out_devtensor
),
Tensor
::
make
(
mean_devtensor
),
Tensor
::
make
(
rstd_devtensor
)};
p
.
affine
?
inputs
[
2
]
->
dnn_tensor
()
:
megdnn
::
TensorND
(),
out
->
dnn_tensor
(),
mean
->
dnn_tensor
(),
rstd
->
dnn_tensor
(),
dnn_wk
);
return
{
out
,
mean
,
rstd
};
}
OP_TRAIT_REG
(
LayerNorm
,
LayerNorm
)
...
...
imperative/src/impl/ops/matmul.cpp
浏览文件 @
d19fc2c1
...
...
@@ -185,12 +185,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
}
if
(
dim1
==
0
||
dim2
==
0
||
layout1
[
layout1
.
ndim
-
1
]
==
0
)
{
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
real_dst_layout
);
if
(
!
out
.
empty
())
{
dev_tensor_memset
(
out
,
0
);
auto
out
=
Tensor
::
make
(
real_dst_layout
,
cn
);
if
(
!
out
->
empty
())
{
dev_tensor_memset
(
out
->
dev_tensor
()
,
0
);
}
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
TensorLayout
layout_a
=
layout1
,
layout_b
=
layout2
;
...
...
@@ -232,13 +232,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t
sz
=
setup_algo
<
megdnn
::
MatrixMul
>
(
{
layout_a
,
layout_b
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
matmul
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
auto
out
=
Tensor
::
make
(
dst_layout
,
cn
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out
.
sub
(
SubTensorSpec
::
make_from_layout
(
real_dst_layout
))
)};
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
->
dnn_tensor
(),
dnn_wk
);
return
{
out
->
sub
(
0
,
real_dst_layout
)};
}
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
...
...
@@ -461,12 +459,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dst_layout
.
init_contiguous_stride
();
if
(
dim1
==
0
||
dim2
==
0
||
layout1
[
layout1
.
ndim
-
1
]
==
0
)
{
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
if
(
!
out
.
empty
())
{
dev_tensor_memset
(
out
,
0
);
auto
out
=
Tensor
::
make
(
dst_layout
,
cn
);
if
(
!
out
->
empty
())
{
dev_tensor_memset
(
out
->
dev_tensor
()
,
0
);
}
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
SmallVector
<
megdnn
::
TensorND
>
inp_tensornds
(
2u
);
...
...
@@ -479,19 +477,17 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{
layout1
,
layout2
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
matmul
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
auto
out
=
Tensor
::
make
(
dst_layout
,
cn
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
.
as_megdnn
(),
dnn_wk
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
->
dnn_tensor
(),
dnn_wk
);
shp1
[
shp1
.
ndim
-
2
]
=
dst_layout
[
dst_layout
.
ndim
-
2
];
shp1
[
shp1
.
ndim
-
1
]
=
dst_layout
[
dst_layout
.
ndim
-
1
];
if
(
maxdim
>
3
)
{
dst_layout
=
dst_layout
.
reshape
(
shp1
);
}
return
{
Tensor
::
make
(
out
.
sub
(
SubTensorSpec
::
make_from_layout
(
dst_layout
))
)};
return
{
out
->
sub
(
0
,
dst_layout
)};
}
SmallVector
<
VarNode
::
LayoutConstraintCallback
>
get_input_layout_constraint
(
...
...
@@ -540,27 +536,23 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dnn_opr
.
op
->
deduce_layout
(
inp1_tensor
.
layout
,
inp2_tensor
.
layout
,
oup_layout
);
if
(
inputs
[
0
]
->
layout
().
is_empty
()
||
inputs
[
1
]
->
layout
().
is_empty
())
{
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
oup_layout
);
if
(
!
out
.
empty
())
{
dev_tensor_memset
(
out
,
0
);
auto
out
=
Tensor
::
make
(
oup_layout
,
comp_node
);
if
(
!
out
->
empty
())
{
dev_tensor_memset
(
out
->
dev_tensor
(),
0
);
}
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
auto
sz
=
dnn_opr
.
op
->
get_workspace_in_bytes
(
inp_tensornds
[
0
].
layout
,
inp_tensornds
[
1
].
layout
,
output_descs
[
0
].
layout
);
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
oup_layout
);
auto
out
=
Tensor
::
make
(
oup_layout
,
comp_node
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out_devtensor
.
as_megdnn
(),
dnn_wk
);
dnn_opr
.
op
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
],
out
->
dnn_tensor
(),
dnn_wk
);
return
{
Tensor
::
make
(
out_devtensor
)
};
return
{
out
};
}
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
...
...
imperative/src/impl/ops/misc.cpp
浏览文件 @
d19fc2c1
...
...
@@ -36,9 +36,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn
::
CheckNonFinite
::
Param
param
({
op
.
scale
});
dnn_opr
.
op
->
param
()
=
param
;
size_t
sz
=
dnn_opr
.
op
->
get_workspace_in_bytes
(
srcs
,
dest
->
layout
());
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
dnn_opr
.
op
->
exec
(
srcs
,
dest
->
dev_tensor
().
as_megdnn
(),
dnn_wk
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
srcs
,
dest
->
dnn_tensor
(),
dnn_wk
);
return
outputs
;
}
...
...
imperative/src/impl/ops/pooling.cpp
浏览文件 @
d19fc2c1
...
...
@@ -66,17 +66,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{
inp_tensornds
[
0
].
layout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
auto
out
=
Tensor
::
make
(
oup_layout
,
cn
);
megdnn
::
Workspace
dnn_wk
;
if
(
wk_size
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
auto
dnn_wk
=
caller
.
create_workspace
(
wk_size
);
dnn_opr
->
exec
(
inp_tensornds
[
0
],
out_devtensor
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out_devtensor
)
};
caller
.
op
->
exec
(
inp_tensornds
[
0
],
out
->
dnn_tensor
(),
dnn_wk
);
return
{
out
};
}
OP_TRAIT_REG
(
Pooling
,
Pooling
)
...
...
imperative/src/impl/ops/reduce.cpp
浏览文件 @
d19fc2c1
...
...
@@ -117,20 +117,20 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
layout
.
remove_axis_inplace
(
axis
);
layout
.
init_contiguous_stride
();
}
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
layout
);
auto
out
=
Tensor
::
make
(
layout
,
comp_node
);
std
::
string
err_msg
;
switch
(
mode
)
{
case
Reduce
::
Mode
::
SUM
:
if
(
!
out
.
empty
())
{
dev_tensor_memset
(
out
,
0
);
if
(
!
out
->
empty
())
{
dev_tensor_memset
(
out
->
dev_tensor
()
,
0
);
}
break
;
case
Reduce
::
Mode
::
PRODUCT
:
if
(
!
out
.
empty
())
{
if
(
!
out
->
empty
())
{
DnnOprCaller
<
megdnn
::
Fill
>
fill_op
(
comp_node
);
fill_op
.
op
->
param
()
=
1
;
fill_op
.
op
->
exec
(
out
.
as_megdnn
(),
{});
fill_op
.
op
->
exec
(
out
->
dnn_tensor
(),
{});
}
break
;
case
Reduce
::
Mode
::
MEAN
:
...
...
@@ -153,34 +153,29 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
MegBrainError
,
"empty input is not allowed for reduce mode: %s"
,
err_msg
.
c_str
());
}
return
{
Tensor
::
make
(
out
)
};
return
{
out
};
}
auto
dnn_ten
=
inputs
[
0
]
->
dnn_tensor
();
dnn_ten
.
layout
=
src
;
inp_tensornds
.
push_back
(
dnn_ten
);
megdnn
::
Workspace
dnn_wk
;
auto
wk_size
=
dnn_op
.
op
->
get_workspace_in_bytes
(
src
,
layout
);
if
(
wk_size
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
dnn_op
.
create_workspace
(
w_layout
);
}
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
comp_node
,
layout
);
dnn_op
.
op
->
exec
(
inp_tensornds
[
0
],
out
.
as_megdnn
(),
dnn_wk
);
auto
dnn_wk
=
dnn_op
.
create_workspace
(
wk_size
);
TensorLayout
ori_layout
=
layout
;
if
(
!
keepdim
&&
src
.
ndim
>
1
)
{
auto
out_layout
=
out
.
layout
();
out_layout
.
remove_axis_inplace
(
axis
);
out_layout
.
init_contiguous_stride
();
out
.
resize
(
out_layout
);
layout
.
remove_axis_inplace
(
axis
);
layout
.
init_contiguous_stride
();
}
return
{
Tensor
::
make
(
out
)};
auto
out
=
Tensor
::
make
(
layout
,
comp_node
);
auto
dnn_out
=
out
->
dnn_tensor
();
dnn_out
.
layout
=
ori_layout
;
dnn_op
.
op
->
exec
(
inp_tensornds
[
0
],
dnn_out
,
dnn_wk
);
return
{
out
};
}
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
...
...
imperative/src/impl/ops/tensor_manip.cpp
浏览文件 @
d19fc2c1
...
...
@@ -252,9 +252,8 @@ SmallVector<TensorPtr> param_pack_concat_apply_on_physical_tensor(
HostTensorStorage
srcs_storage
;
srcs_storage
.
reset
(
comp_node
,
srcs_size
,
srcs_ptr
);
caller
.
op
->
exec
(
{
srcs_raw_ptr
,
srcs_layout
},
inputs
.
back
()
->
dev_tensor
().
as_megdnn
(),
output
->
dev_tensor
().
as_megdnn
(),
caller
.
create_workspace
({{
ws_size
},
dtype
::
Byte
()}));
{
srcs_raw_ptr
,
srcs_layout
},
inputs
.
back
()
->
dnn_tensor
(),
output
->
dnn_tensor
(),
caller
.
create_workspace
(
ws_size
));
async_release
(
HostTensorND
{
comp_node
,
srcs_layout
}.
storage
(
srcs_storage
));
return
{
output
};
}
...
...
imperative/src/impl/ops/vision.cpp
浏览文件 @
d19fc2c1
...
...
@@ -89,8 +89,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t
sz
=
dnn_opr
.
op
->
get_workspace_in_bytes
(
inputs
[
0
]
->
layout
(),
inputs
[
1
]
->
layout
(),
out_layout
,
ind_layout
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
sz
);
dnn_opr
.
op
->
exec
(
inputs
[
0
]
->
dnn_tensor
(),
inputs
[
1
]
->
dnn_tensor
(),
out
.
as_megdnn
(),
...
...
imperative/src/impl/physical_tensor.cpp
浏览文件 @
d19fc2c1
...
...
@@ -566,9 +566,13 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) {
return
ret
;
}
bool
Tensor
::
empty
()
{
return
!
m_blob
->
size
();
}
megdnn
::
TensorND
Tensor
::
dnn_tensor
()
{
mgb_assert
(
m_blob
,
"uninitialized tensor."
);
return
{
m_layout
,
{
m_blob
->
storage
().
get
(),
m_offset
}
};
return
DnnTensorND
{
m_layout
,
m_blob
->
storage
(),
m_offset
};
}
void
Tensor
::
fetch_value
()
{
...
...
imperative/src/include/megbrain/imperative/physical_tensor.h
浏览文件 @
d19fc2c1
...
...
@@ -10,6 +10,7 @@
#include "megbrain/imperative/resource_manager.h"
#include "megbrain/tensor.h"
#include "megbrain/utils/metahelper.h"
#include "megdnn/basic_types.h"
namespace
mgb
{
namespace
imperative
{
...
...
@@ -87,6 +88,22 @@ using EventPtr = std::unique_ptr<CompNode::Event, EventDeleter>;
class
Tensor
;
using
TensorPtr
=
std
::
shared_ptr
<
Tensor
>
;
/*
using DnnTensorND to save the reference count of workspace
allocted by blobmanager to prevent invalidation
*/
struct
DnnTensorND
:
megdnn
::
TensorND
{
private:
std
::
shared_ptr
<
dt_byte
>
m_reference
;
public:
DnnTensorND
(
TensorLayout
&
layout_
,
std
::
shared_ptr
<
dt_byte
>
ref_ptr
,
size_t
offset
)
:
megdnn
::
TensorND
(
layout_
,
{
ref_ptr
.
get
(),
offset
})
{
m_reference
=
ref_ptr
;
}
};
class
Tensor
:
public
NonCopyableObj
{
public:
Tensor
()
=
default
;
...
...
@@ -131,6 +148,8 @@ public:
void
to_contiguous_inplace
();
bool
empty
();
DeviceTensorND
dev_tensor
(
bool
contiguous
=
true
);
void
assign_from_dev_tensor
(
DeviceTensorND
);
...
...
src/opr/impl/basic_arith.cpp
浏览文件 @
d19fc2c1
...
...
@@ -258,9 +258,9 @@ void Elemwise::perform(
}
void
Elemwise
::
perform_dnn
(
CompNode
cn
,
Device
TensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
CompNode
cn
,
const
megdnn
::
TensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
)
{
call_megdnn_opr_exec
(
cn
,
inputs
,
dest
.
as_megdnn
()
,
opr
.
get
(),
nullptr
);
call_megdnn_opr_exec
(
cn
,
inputs
,
dest
,
opr
.
get
(),
nullptr
);
}
TensorLayoutArray
Elemwise
::
collective_collapse
(
const
TensorLayoutArray
&
layouts
)
{
...
...
src/opr/include/megbrain/opr/basic_arith.h
浏览文件 @
d19fc2c1
...
...
@@ -78,7 +78,7 @@ public:
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
);
MGE_WIN_DECLSPEC_FUC
static
void
perform_dnn
(
CompNode
cn
,
Device
TensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
CompNode
cn
,
const
megdnn
::
TensorND
&
dest
,
megdnn
::
TensorNDArray
&
inputs
,
intl
::
UniqPtrWithCN
<
megdnn
::
Elemwise
>&
opr
);
using
TensorLayoutPtrArray
=
SmallVector
<
TensorLayout
*>
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录